[clang] [llvm] AMDGPU: Add builtin/intrinsic global_(load|store)_b128 (PR #172090)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 12 13:58:48 PST 2025
https://github.com/macurtis-amd created https://github.com/llvm/llvm-project/pull/172090
Add clang builtins and associated llvm intrinsics for scoped load/store of 128bits
New builtins:
1. `__builtin_amdgcn_global_load_b128` ([documentation](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/clang/docs/LanguageExtensions.rst#__builtin_amdgcn_global_load_b128-and-__builtin_amdgcn_global_store_b128), [test/examples](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl))
2. `__builtin_amdgcn_global_store_b128` ([documentation](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/clang/docs/LanguageExtensions.rst#__builtin_amdgcn_global_load_b128-and-__builtin_amdgcn_global_store_b128), [test/examples](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl))
And corresponding intrinsics:
1. `llvm.amdgcn.global.load.b128` ([documentation](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/llvm/docs/AMDGPUUsage.rst) - search for intrinsic name, [test/examples](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.b128.ll) )
2. `llvm.amdgcn.global.store.b128` ([documentation](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/llvm/docs/AMDGPUUsage.rst) - search for intrinsic name, [test/examples](https://github.com/macurtis-amd/llvm-project/blob/global-load-store-b128/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.b128.ll) )
These will initially be used by [RCCL](https://github.com/ROCm/rccl) to address some low-level performance issues.
>From 9bb8bb30a20f3d5267d80254b935ae45266caf19 Mon Sep 17 00:00:00 2001
From: "Curtis, Matthew" <Matthew.Curtis at amd.com>
Date: Thu, 16 Oct 2025 05:26:52 -0500
Subject: [PATCH] AMDGPU: Add builtin/intrinsic global_(load|store)_b128
---
clang/docs/LanguageExtensions.rst | 37 +
clang/include/clang/Basic/BuiltinsAMDGPU.def | 3 +
clang/include/clang/Sema/SemaAMDGPU.h | 2 +
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 20 +
clang/lib/Sema/SemaAMDGPU.cpp | 16 +
.../builtins-amdgcn-global-load-store.cl | 113 +
...builtins-amdgcn-global-load-store-error.cl | 22 +
...s-amdgcn-global-load-store-target-error.cl | 26 +
llvm/docs/AMDGPUUsage.rst | 106 +
.../CodeGen/GlobalISel/GIMatchTableExecutor.h | 6 +
.../GlobalISel/GIMatchTableExecutorImpl.h | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 25 +
llvm/lib/IR/Verifier.cpp | 33 +-
.../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 3 +
llvm/lib/Target/AMDGPU/FLATInstructions.td | 15 +
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 22 +
.../AMDGPU/llvm.amdgcn.global.load.b128.ll | 30869 ++++++++++++++++
.../AMDGPU/llvm.amdgcn.global.store.b128.ll | 3888 ++
.../CodeGen/AMDGPU/unsupported-global-load.ll | 36 +
.../AMDGPU/unsupported-global-store.ll | 36 +
llvm/test/Verifier/amdgpu-intrinsics.ll | 66 +
.../GlobalISel/GlobalISelMatchTable.cpp | 17 +
.../Common/GlobalISel/GlobalISelMatchTable.h | 18 +
23 files changed, 35385 insertions(+), 3 deletions(-)
create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl
create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-error.cl
create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.b128.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.b128.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/unsupported-global-load.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/unsupported-global-store.ll
create mode 100644 llvm/test/Verifier/amdgpu-intrinsics.ll
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index c4b86b203d383..4d4d6ca3fe0bd 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -5243,6 +5243,43 @@ returns the bit at the position of the current lane. It is almost equivalent to
``(mask & (1 << lane_id)) != 0``, except that its behavior is only defined if
the given mask has the same value for all active lanes of the current wave.
+
+__builtin_amdgcn_global_load_b128 and __builtin_amdgcn_global_store_b128
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Signature:
+
+.. code-block:: c
+
+ typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u;
+ typedef v4u __attribute__((address_space(1))) *global_ptr_to_v4u;
+
+ v4u __builtin_amdgcn_global_load_b128(
+ v4u __attribute__((address_space(1))) *src,
+ const char *scope);
+
+ void __builtin_amdgcn_global_store_b128(
+ v4u __attribute__((address_space(1))) *dst,
+ v4u data,
+ const char *scope);
+
+Load or store a vector of 4 unsigned integers from or to global memory with
+cache behavior specified by `scope` which must be a string literal.
+
+Valid values for `scope` are:
+
+* ``"wavefront"``
+* ``"workgroup"``
+* ``"agent"``
+* ``""`` (empty string)
+
+These builtins are supported on gfx9, gfx10, gfx11, and gfx12 targets.
+
+They map to the llvm intrinsics ``llvm.amdgcn.global.load.b128`` and
+``llvm.amdgcn.global.store.b128`` documented in `User Guide for AMDGPU Backend
+<https://llvm.org/docs/AMDGPUUsage.html>`_.
+
+
ARM/AArch64 Language Extensions
-------------------------------
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a867144d83928..4bc5b1c16f2ad 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -190,6 +190,9 @@ TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "",
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "", "vmem-to-lds-load-insts")
TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "", "vmem-to-lds-load-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_b128, "V4UiV4Ui*1cC*", "n", "gfx9-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_store_b128, "vV4Ui*1V4UicC*", "n", "gfx9-insts")
+
//===----------------------------------------------------------------------===//
// Ballot builtins.
//===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h
index bac812a9d4fcf..556bfb705de67 100644
--- a/clang/include/clang/Sema/SemaAMDGPU.h
+++ b/clang/include/clang/Sema/SemaAMDGPU.h
@@ -28,6 +28,8 @@ class SemaAMDGPU : public SemaBase {
bool checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore);
+ bool checkScopedMemAccessFunctionCall(CallExpr *TheCall);
+
bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
unsigned NumDataArgs);
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index eabdc370da6b4..384f76e092252 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -885,6 +885,26 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
llvm::Function *F = CGM.getIntrinsic(IID, {Args[0]->getType()});
return Builder.CreateCall(F, {Args});
}
+ case AMDGPU::BI__builtin_amdgcn_global_load_b128:
+ case AMDGPU::BI__builtin_amdgcn_global_store_b128: {
+ const bool IsStore =
+ BuiltinID == AMDGPU::BI__builtin_amdgcn_global_store_b128;
+ LLVMContext &Ctx = CGM.getLLVMContext();
+ SmallVector<Value *, 5> Args = {EmitScalarExpr(E->getArg(0))}; // addr
+ if (IsStore)
+ Args.push_back(EmitScalarExpr(E->getArg(1))); // data
+ const unsigned ScopeIdx = E->getNumArgs() - 1;
+ StringRef ScopeLit =
+ cast<StringLiteral>(E->getArg(ScopeIdx)->IgnoreParenCasts())
+ ->getString();
+ llvm::MDNode *MD =
+ llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, ScopeLit)});
+ Args.push_back(llvm::MetadataAsValue::get(Ctx, MD)); // scope
+ llvm::Function *F =
+ CGM.getIntrinsic(IsStore ? Intrinsic::amdgcn_global_store_b128
+ : Intrinsic::amdgcn_global_load_b128);
+ return Builder.CreateCall(F, Args);
+ }
case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
{llvm::Type::getInt64Ty(getLLVMContext())});
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index cece22092bb14..72c7bf03f93ad 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -255,6 +255,9 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result));
}
+ case AMDGPU::BI__builtin_amdgcn_global_load_b128:
+ case AMDGPU::BI__builtin_amdgcn_global_store_b128:
+ return checkScopedMemAccessFunctionCall(TheCall);
default:
return false;
}
@@ -344,6 +347,19 @@ bool SemaAMDGPU::checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore) {
return Fail;
}
+bool SemaAMDGPU::checkScopedMemAccessFunctionCall(CallExpr *TheCall) {
+ bool Fail = false;
+ // Last argument is a string literal
+ Expr *Arg = TheCall->getArg(TheCall->getNumArgs() - 1);
+ auto Scope = dyn_cast<StringLiteral>(Arg->IgnoreParenCasts());
+ if (!Scope) {
+ Fail = true;
+ Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal)
+ << Arg->getSourceRange();
+ }
+ return Fail;
+}
+
bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
unsigned NumDataArgs) {
assert(NumDataArgs <= 2);
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl
new file mode 100644
index 0000000000000..7ffceead747e8
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl
@@ -0,0 +1,113 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals smart
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -emit-llvm -o - %s | FileCheck %s -check-prefixes=GFX,GFX950
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx9-4-generic -emit-llvm -o - %s | FileCheck %s -check-prefixes=GFX,GFX9_4_GENERIC
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s -check-prefixes=GFX,GFX1250
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx12-generic -emit-llvm -o - %s | FileCheck %s -check-prefixes=GFX,GFX12_GENERIC
+
+typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u32;
+typedef v4u32 __global *global_ptr_to_v4u32;
+
+//------------------------------------------------------------------------------
+// Store
+//------------------------------------------------------------------------------
+// GFX-LABEL: @test_amdgcn_global_store_b128_00(
+// GFX-NEXT: entry:
+// GFX-NEXT: tail call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META4:![0-9]+]])
+// GFX-NEXT: ret void
+//
+void test_amdgcn_global_store_b128_00(global_ptr_to_v4u32 ptr, v4u32 data) {
+ __builtin_amdgcn_global_store_b128(ptr, data, "wavefront");
+}
+
+// GFX-LABEL: @test_amdgcn_global_store_b128_01(
+// GFX-NEXT: entry:
+// GFX-NEXT: tail call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META5:![0-9]+]])
+// GFX-NEXT: ret void
+//
+void test_amdgcn_global_store_b128_01(global_ptr_to_v4u32 ptr, v4u32 data) {
+ __builtin_amdgcn_global_store_b128(ptr, data, "workgroup");
+}
+
+// GFX-LABEL: @test_amdgcn_global_store_b128_10(
+// GFX-NEXT: entry:
+// GFX-NEXT: tail call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META6:![0-9]+]])
+// GFX-NEXT: ret void
+//
+void test_amdgcn_global_store_b128_10(global_ptr_to_v4u32 ptr, v4u32 data) {
+ __builtin_amdgcn_global_store_b128(ptr, data, "agent");
+}
+
+// GFX-LABEL: @test_amdgcn_global_store_b128_11(
+// GFX-NEXT: entry:
+// GFX-NEXT: tail call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META7:![0-9]+]])
+// GFX-NEXT: ret void
+//
+void test_amdgcn_global_store_b128_11(global_ptr_to_v4u32 ptr, v4u32 data) {
+ __builtin_amdgcn_global_store_b128(ptr, data, "");
+}
+
+//------------------------------------------------------------------------------
+// Load
+//------------------------------------------------------------------------------
+// GFX-LABEL: @test_amdgcn_global_load_b128_00(
+// GFX-NEXT: entry:
+// GFX-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) [[PTR:%.*]], metadata [[META4]])
+// GFX-NEXT: ret <4 x i32> [[TMP0]]
+//
+v4u32 test_amdgcn_global_load_b128_00(global_ptr_to_v4u32 ptr) {
+ return __builtin_amdgcn_global_load_b128(ptr, "wavefront");
+}
+
+// GFX-LABEL: @test_amdgcn_global_load_b128_01(
+// GFX-NEXT: entry:
+// GFX-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) [[PTR:%.*]], metadata [[META5]])
+// GFX-NEXT: ret <4 x i32> [[TMP0]]
+//
+v4u32 test_amdgcn_global_load_b128_01(global_ptr_to_v4u32 ptr) {
+ return __builtin_amdgcn_global_load_b128(ptr, "workgroup");
+}
+
+// GFX-LABEL: @test_amdgcn_global_load_b128_10(
+// GFX-NEXT: entry:
+// GFX-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) [[PTR:%.*]], metadata [[META6]])
+// GFX-NEXT: ret <4 x i32> [[TMP0]]
+//
+v4u32 test_amdgcn_global_load_b128_10(global_ptr_to_v4u32 ptr) {
+ return __builtin_amdgcn_global_load_b128(ptr, "agent");
+}
+
+// GFX-LABEL: @test_amdgcn_global_load_b128_11(
+// GFX-NEXT: entry:
+// GFX-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) [[PTR:%.*]], metadata [[META7]])
+// GFX-NEXT: ret <4 x i32> [[TMP0]]
+//
+v4u32 test_amdgcn_global_load_b128_11(global_ptr_to_v4u32 ptr) {
+ return __builtin_amdgcn_global_load_b128(ptr, "");
+}
+//.
+// GFX950: [[META4]] = !{!"wavefront"}
+// GFX950: [[META5]] = !{!"workgroup"}
+// GFX950: [[META6]] = !{!"agent"}
+// GFX950: [[META7]] = !{!""}
+//.
+// GFX9_4_GENERIC: [[META4]] = !{!"wavefront"}
+// GFX9_4_GENERIC: [[META5]] = !{!"workgroup"}
+// GFX9_4_GENERIC: [[META6]] = !{!"agent"}
+// GFX9_4_GENERIC: [[META7]] = !{!""}
+//.
+// GFX1250: [[META4]] = !{!"wavefront"}
+// GFX1250: [[META5]] = !{!"workgroup"}
+// GFX1250: [[META6]] = !{!"agent"}
+// GFX1250: [[META7]] = !{!""}
+//.
+// GFX12_GENERIC: [[META4]] = !{!"wavefront"}
+// GFX12_GENERIC: [[META5]] = !{!"workgroup"}
+// GFX12_GENERIC: [[META6]] = !{!"agent"}
+// GFX12_GENERIC: [[META7]] = !{!""}
+//.
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// GFX1250: {{.*}}
+// GFX12_GENERIC: {{.*}}
+// GFX950: {{.*}}
+// GFX9_4_GENERIC: {{.*}}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-error.cl
new file mode 100644
index 0000000000000..b21b604baa944
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-error.cl
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx9-4-generic -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u32;
+typedef v4u32 __global *global_ptr_to_v4u32;
+
+void test_amdgcn_global_store_b128_00(v4u32 *ptr, v4u32 data, const char* scope) {
+ __builtin_amdgcn_global_store_b128(ptr, data, ""); //expected-error{{passing '__private v4u32 *__private' to parameter of type '__attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int __global *' changes address space of pointer}}
+}
+
+void test_amdgcn_global_store_b128_01(global_ptr_to_v4u32 ptr, v4u32 data, const char* scope) {
+ __builtin_amdgcn_global_store_b128(ptr, data, scope); //expected-error{{expression is not a string literal}}
+}
+
+v4u32 test_amdgcn_global_load_b128_00(v4u32 *ptr, const char* scope) {
+ return __builtin_amdgcn_global_load_b128(ptr, ""); //expected-error{{passing '__private v4u32 *__private' to parameter of type '__attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int __global *' changes address space of pointer}}
+}
+
+v4u32 test_amdgcn_global_load_b128_01(global_ptr_to_v4u32 ptr, const char* scope) {
+ return __builtin_amdgcn_global_load_b128(ptr, scope); //expected-error{{expression is not a string literal}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl
new file mode 100644
index 0000000000000..ec357c58ef903
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl
@@ -0,0 +1,26 @@
+// We test loads and stores separately because clang only seems to exit after
+// the first 'target feature' error.
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx602 -DTEST_LOAD -S -verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx705 -DTEST_LOAD -S -verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx810 -DTEST_LOAD -S -verify -o - %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx602 -DTEST_STORE -S -verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx705 -DTEST_STORE -S -verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx810 -DTEST_STORE -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u32;
+typedef v4u32 __global *global_ptr_to_v4u32;
+
+#ifdef TEST_LOAD
+v4u32 test_amdgcn_global_load_b128_01(global_ptr_to_v4u32 ptr, const char* scope) {
+ return __builtin_amdgcn_global_load_b128(ptr, ""); // expected-error{{'__builtin_amdgcn_global_load_b128' needs target feature gfx9-insts}}
+}
+#endif
+
+#ifdef TEST_STORE
+void test_amdgcn_global_store_b128_01(global_ptr_to_v4u32 ptr, v4u32 data, const char* scope) {
+ __builtin_amdgcn_global_store_b128(ptr, data, ""); // expected-error{{'__builtin_amdgcn_global_store_b128' needs target feature gfx9-insts}}
+}
+#endif
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 7ecf1c1124894..39afd29737156 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1596,6 +1596,112 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
* 1 - Data cache.
Instruction cache prefetches are unsafe on invalid address.
+
+ llvm.amdgcn.global.load.b128 This intrinsic is supported on gfx9, gfx10, gfx11, and gfx12 targets.
+
+ Signature:
+
+ .. code-block:: llvm
+
+ <4 x i32> @llvm.amdgcn.global.load.b128(
+ ptr addrspace(1), ; source
+ metadata) ; scope - e.g. '!0' where '!0 = !{!"wavegroup"}'
+
+ Reads the value from the source address with cache behavior specified by the scope.
+
+ The following table shows the mapping between valid scope values and target
+ instruction flags or field values.
+
+ ============== ========================== ========================== ========================== ========================== ==========================
+ targets instruction ``"wavefront"`` ``"workgroup"`` ``"agent"`` ``""`` (empty string)
+ ============== ========================== ========================== ========================== ========================== ==========================
+ gfx90* ``global_load_dwordx4`` ``glc`` ``glc``
+
+ gfx942, gfx950 ``global_load_dwordx4`` (wave) ``sc0`` (group) ``sc1`` (device) ``sc0 sc1`` (system)
+
+ gfx10* ``global_load_dwordx4`` ``glc`` ``glc dlc`` ``glc dlc``
+
+ gfx11* ``global_load_dwordx4`` ``glc`` ``glc`` ``glc``
+
+ gfx120* ``global_load_b128`` (CU) ``scope:SCOPE_SE`` (SE) ``scope:SCOPE_DEV`` (DEV) ``scope:SCOPE_SYS`` (SYS)
+
+ gfx125* ``global_load_b128`` (CU) ``scope:SCOPE_DEV`` (DEV) ``scope:SCOPE_SYS`` (SYS)
+ ============== ========================== ========================== ========================== ========================== ==========================
+
+ For gfx90*, see "GLC Bit Explained" in the appropriate instruction set reference
+ (e.g. Chapter 9.1.10 in "AMD Instinct MI100" Instruction Set Architecture Reference
+ Guide).
+
+ For gfx942 and gfx950 targets, see "Memory Scope and Temporal Controls" in the
+ appropriate instruction set reference (e.g. Chapter 9.1.10.2 in the "AMD Instinct
+ MI300" Instruction Set Architecture Reference Guide).
+
+ For gfx10* targets, see "GLC, DLC and SLC Bit Explained" in the appropriate
+ instruction set reference (e.g. Chapter 8.1.10 in "RDNA 2" Instruction Set Architecture
+ Reference Guide)
+
+ For gfx11* targets, see "Cache Controls: SLC, GLC and DLC" in the appropriate
+ instruction set reference (e.g. Chapter 4.1.1 in "RDNA3" Instruction Set Architecture
+ Reference Guide).
+
+ For gfx12* targets, see "Cache Controls: SCOPE and Temporal-Hint" in the
+ appropriate instruction set reference (e.g. Chapter 4.1.1 in the "RDNA4"
+ Instruction Set Architecture Reference Guide).
+
+
+ llvm.amdgcn.global.store.b128 This intrinsic is supported on gfx9, gfx10, gfx11, and gfx12 targets.
+
+ Signature:
+
+ .. code-block:: llvm
+
+ void @llvm.amdgcn.global.store.b128(
+ ptr addrspace(1), ; destination
+ <4 x i32>, ; value
+ metadata) ; scope - e.g. '!0' where '!0 = !{!"wavegroup"}'
+
+ Writes the value to the destination address with cache
+ behavior specified by the scope.
+
+ The following table shows the mapping between valid scope values and target
+ instruction flags or field values.
+
+ ============== ========================== ========================== ========================== ========================== ==========================
+ targets instruction ``"wavefront"`` ``"workgroup"`` ``"agent"`` ``""`` (empty string)
+ ============== ========================== ========================== ========================== ========================== ==========================
+ gfx90* ``global_store_dwordx4``
+
+ gfx942, gfx950 ``global_store_dwordx4`` (wave) ``sc0`` (group) ``sc1`` (device) ``sc0 sc1`` (system)
+
+ gfx10* ``global_store_dwordx4``
+
+ gfx11* ``global_store_dwordx4``
+
+ gfx120* ``global_store_b128`` (CU) ``scope:SCOPE_SE`` (SE) ``scope:SCOPE_DEV`` (DEV) ``scope:SCOPE_SYS`` (SYS)
+
+ gfx125* ``global_store_b128`` (CU) ``scope:SCOPE_DEV`` (DEV) ``scope:SCOPE_SYS`` (SYS)
+ ============== ========================== ========================== ========================== ========================== ==========================
+
+ For gfx90*, see "GLC Bit Explained" in the appropriate instruction set reference
+ (e.g. Chapter 9.1.10 in "AMD Instinct MI100" Instruction Set Architecture Reference
+ Guide).
+
+ For gfx942 and gfx950 targets, see "Memory Scope and Temporal Controls" in the
+ appropriate instruction set reference (e.g. Chapter 9.1.10.2 in the "AMD Instinct
+ MI300" Instruction Set Architecture Reference Guide).
+
+ For gfx10* targets, see "GLC, DLC and SLC Bit Explained" in the appropriate
+ instruction set reference (e.g. Chapter 8.1.10 in "RDNA 2" Instruction Set
+ Architecture Reference Guide)
+
+ For gfx11* targets, see "Cache Controls: SLC, GLC and DLC" in the appropriate
+ instruction set reference (e.g. Chapter 4.1.1 in "RDNA3" Instruction Set
+ Architecture Reference Guide).
+
+ For gfx12* targets, see "Cache Controls: SCOPE and Temporal-Hint" in the
+ appropriate instruction set reference (e.g. Chapter 4.1.1 in the "RDNA4"
+ Instruction Set Architecture Reference Guide).
+
============================================== ==========================================================
.. TODO::
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
index 3a2509345b776..f21923827039c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
@@ -246,6 +246,12 @@ enum {
/// - SizeInBits(ULEB128) - The size of the pointer value in bits.
GIM_CheckPointerToAny,
+ /// Check the machine type of the specified operand
+ /// - InsnID(ULEB128) - Instruction ID
+ /// - OpIdx(ULEB128) - Operand index
+ /// - MachineOperandType(ULEB128) - Expected type
+ GIM_CheckMachineOperandType,
+
/// Check the register bank for the specified operand
/// - InsnID(ULEB128) - Instruction ID
/// - OpIdx(ULEB128) - Operand index
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
index a50a0a04fe2bc..b6b2b14c94f66 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
@@ -768,6 +768,15 @@ bool GIMatchTableExecutor::executeMatchTable(
break;
}
+ case GIM_CheckMachineOperandType: {
+ uint64_t InsnID = readULEB();
+ uint64_t OpIdx = readULEB();
+ uint64_t MOTy = readULEB();
+ MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+ if (MO.getType() != MOTy)
+ return false;
+ break;
+ }
case GIM_RecordNamedOperand: {
uint64_t InsnID = readULEB();
uint64_t OpIdx = readULEB();
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 19d5f24c5d5e0..4780e143bbb7b 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -906,6 +906,31 @@ def int_amdgcn_bitop3 :
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]>;
+class AMDGPUGlobalStore : Intrinsic <
+ [],
+ [global_ptr_ty, // Base global pointer to store to
+ llvm_v4i32_ty, // Data to store
+ llvm_metadata_ty], // Scope
+ [ IntrWriteMem, WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
+ IntrWillReturn, IntrNoCallback, IntrNoFree ],
+ "",
+ [SDNPMemOperand, SDNPMayStore]
+>;
+
+def int_amdgcn_global_store_b128 : AMDGPUGlobalStore;
+
+class AMDGPUGlobalLoad : Intrinsic <
+ [llvm_v4i32_ty],
+ [global_ptr_ty, // Base global pointer to load from
+ llvm_metadata_ty], // Scope
+ [ IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, IntrWillReturn,
+ IntrNoCallback, IntrNoFree ],
+ "",
+ [SDNPMemOperand, SDNPMayLoad]
+>;
+
+def int_amdgcn_global_load_b128 : AMDGPUGlobalLoad;
+
} // TargetPrefix = "amdgcn"
// New-style image intrinsics
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 543c26dfe25e0..91dd61ff8da05 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5755,6 +5755,15 @@ void Verifier::visitInstruction(Instruction &I) {
InstsInThisBlock.insert(&I);
}
+inline MDString *getMetadataValueAsString(MetadataAsValue *MDV) {
+ if (!MDV)
+ return nullptr;
+ auto *MD = dyn_cast<MDTuple>(MDV->getMetadata());
+ if (!MD || MD->getNumOperands() != 1)
+ return nullptr;
+ return dyn_cast<MDString>(MD->getOperand(0));
+}
+
/// Allow intrinsics to be verified in different ways.
void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Function *IF = Call.getCalledFunction();
@@ -6966,14 +6975,32 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
&Call, PtrArg);
// Last argument must be a MD string
- auto *Op = cast<MetadataAsValue>(Call.getArgOperand(Call.arg_size() - 1));
- MDNode *MD = cast<MDNode>(Op->getMetadata());
- Check((MD->getNumOperands() == 1) && isa<MDString>(MD->getOperand(0)),
+ auto *Op =
+ dyn_cast<MetadataAsValue>(Call.getArgOperand(Call.arg_size() - 1));
+ Check(getMetadataValueAsString(Op) != nullptr,
"cooperative atomic intrinsics require that the last argument is a "
"metadata string",
&Call, Op);
break;
}
+ case Intrinsic::amdgcn_global_load_b128:
+ case Intrinsic::amdgcn_global_store_b128: {
+ auto *Op =
+ dyn_cast<MetadataAsValue>(Call.getArgOperand(Call.arg_size() - 1));
+ MDString *MDStr = getMetadataValueAsString(Op);
+ Check(MDStr != nullptr,
+ "global load/store intrinsics require that the last argument is a "
+ "metadata string",
+ &Call, Op);
+
+ StringRef Scope = MDStr->getString();
+ Check(Scope == "" || Scope == "agent" || Scope == "workgroup" ||
+ Scope == "wavefront",
+ "'" + Scope +
+ "' is not a valid scope for global load/store intrinsics",
+ &Call, Op);
+ break;
+ }
case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: {
Value *V = Call.getArgOperand(0);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 266c708f48737..d9e17e2606eab 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -5628,6 +5628,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_flat_prefetch:
case Intrinsic::amdgcn_global_prefetch:
return getDefaultMappingVOP(MI);
+ case Intrinsic::amdgcn_global_load_b128:
+ case Intrinsic::amdgcn_global_store_b128:
+ return getDefaultMappingAllVGPR(MI);
default:
return getInvalidInstructionMapping();
}
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 9e38af91c7ccf..56cbb7284b227 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1826,6 +1826,21 @@ multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
}
}
+class LoadIntrinWithScopeMetadata<SDPatternOperator intrin> : PatFrag<
+ (ops node:$ptr),
+ (intrin $ptr, srcvalue)>;
+def global_load_b128_intrin_pat : LoadIntrinWithScopeMetadata<int_amdgcn_global_load_b128>;
+
+class StoreIntrinWithScopeMetadata<SDPatternOperator intrin> : PatFrag<
+ (ops node:$data, node:$ptr),
+ (intrin $ptr, $data, srcvalue)>;
+def global_store_b128_intrin_pat : StoreIntrinWithScopeMetadata<int_amdgcn_global_store_b128>;
+
+let SubtargetPredicate = HasFlatGlobalInsts in {
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, global_load_b128_intrin_pat, v4i32>;
+defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, global_store_b128_intrin_pat, v4i32>;
+}
+
multiclass GlobalFLATStorePats_D16_t16<string inst, SDPatternOperator node, ValueType vt> {
def : FlatStoreSignedPat<!cast<FLAT_Pseudo>(inst#"_t16"), node, vt> {
let AddedComplexity = 10;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 101fefcc4574b..8a01f91fb75cf 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1649,6 +1649,26 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
return true;
}
+ case Intrinsic::amdgcn_global_load_b128:
+ case Intrinsic::amdgcn_global_store_b128: {
+ bool IsStore = IntrID == Intrinsic::amdgcn_global_store_b128;
+ Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = EVT::getIntegerVT(CI.getContext(), 128);
+ Info.ptrVal = CI.getArgOperand(0);
+ Info.flags |=
+ IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
+ // Pretend to be atomic so that SIMemoryLegalizer::expandStore sets cache
+ // flags appropriately.
+ Info.order = AtomicOrdering::Monotonic;
+
+ LLVMContext &Ctx = CI.getContext();
+ unsigned ScopeIdx = CI.arg_size() - 1;
+ MDNode *ScopeMD = cast<MDNode>(
+ cast<MetadataAsValue>(CI.getArgOperand(ScopeIdx))->getMetadata());
+ StringRef Scope = cast<MDString>(ScopeMD->getOperand(0))->getString();
+ Info.ssid = Ctx.getOrInsertSyncScopeID(Scope);
+ return true;
+ }
case Intrinsic::amdgcn_load_to_lds:
case Intrinsic::amdgcn_global_load_lds: {
Info.opc = ISD::INTRINSIC_VOID;
@@ -1753,6 +1773,8 @@ bool SITargetLowering::getAddrModeArguments(const IntrinsicInst *II,
case Intrinsic::amdgcn_global_store_async_from_lds_b32:
case Intrinsic::amdgcn_global_store_async_from_lds_b64:
case Intrinsic::amdgcn_global_store_async_from_lds_b128:
+ case Intrinsic::amdgcn_global_load_b128:
+ case Intrinsic::amdgcn_global_store_b128:
Ptr = II->getArgOperand(0);
break;
case Intrinsic::amdgcn_load_to_lds:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.b128.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.b128.ll
new file mode 100644
index 0000000000000..9fde46b1f8239
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.b128.ll
@@ -0,0 +1,30869 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-generic < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX9-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX906-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX908-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX90A-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-4-generic < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX9-4-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX942-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX950-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX10-1-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX1012-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX10-3-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx11-generic < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX11-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX1250-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx12-generic < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX12-GENERIC-SDAG %s
+
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-generic < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX9-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX906-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX908-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX90A-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-4-generic < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX9-4-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX942-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX950-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX10-1-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX1012-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX10-3-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx11-generic < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX11-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX1250-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx12-generic < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX12-GENERIC-ISEL %s
+
+
+;;==============================================================================
+;; A few basic test cases
+;;==============================================================================
+define <4 x i32> @global_load_b128_0_00(ptr addrspace(1) %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_0_00:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_0_00:
+; GFX906-SDAG: ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_0_00:
+; GFX908-SDAG: ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_0_00:
+; GFX90A-SDAG: ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_0_00:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_0_00:
+; GFX942-SDAG: ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_0_00:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_0_00:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_0_00:
+; GFX1012-SDAG: ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_0_00:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_0_00:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_0_00:
+; GFX1250-SDAG: ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_0_00:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_0_00:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_0_00:
+; GFX906-ISEL: ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_0_00:
+; GFX908-ISEL: ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_0_00:
+; GFX90A-ISEL: ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_0_00:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_0_00:
+; GFX942-ISEL: ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_0_00:
+; GFX950-ISEL: ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_0_00:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_0_00:
+; GFX1012-ISEL: ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_0_00:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_0_00:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_0_00:
+; GFX1250-ISEL: ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_0_00:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !0)
+ ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_0_01(ptr addrspace(1) %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_0_01:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_0_01:
+; GFX906-SDAG: ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_0_01:
+; GFX908-SDAG: ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_0_01:
+; GFX90A-SDAG: ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_0_01:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_0_01:
+; GFX942-SDAG: ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_0_01:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_0_01:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_0_01:
+; GFX1012-SDAG: ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_0_01:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_0_01:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_0_01:
+; GFX1250-SDAG: ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_0_01:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_0_01:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_0_01:
+; GFX906-ISEL: ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_0_01:
+; GFX908-ISEL: ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_0_01:
+; GFX90A-ISEL: ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_0_01:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_0_01:
+; GFX942-ISEL: ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_0_01:
+; GFX950-ISEL: ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_0_01:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_0_01:
+; GFX1012-ISEL: ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_0_01:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_0_01:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_0_01:
+; GFX1250-ISEL: ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_0_01:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !1)
+ ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_0_10(ptr addrspace(1) %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_0_10:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_0_10:
+; GFX906-SDAG: ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_0_10:
+; GFX908-SDAG: ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_0_10:
+; GFX90A-SDAG: ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_0_10:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_0_10:
+; GFX942-SDAG: ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_0_10:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_0_10:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_0_10:
+; GFX1012-SDAG: ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_0_10:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_0_10:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_0_10:
+; GFX1250-SDAG: ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_0_10:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_0_10:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_0_10:
+; GFX906-ISEL: ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_0_10:
+; GFX908-ISEL: ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_0_10:
+; GFX90A-ISEL: ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_0_10:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_0_10:
+; GFX942-ISEL: ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_0_10:
+; GFX950-ISEL: ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_0_10:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_0_10:
+; GFX1012-ISEL: ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_0_10:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_0_10:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_0_10:
+; GFX1250-ISEL: ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_0_10:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !2)
+ ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_0_11(ptr addrspace(1) %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_0_11:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_0_11:
+; GFX906-SDAG: ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_0_11:
+; GFX908-SDAG: ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_0_11:
+; GFX90A-SDAG: ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_0_11:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_0_11:
+; GFX942-SDAG: ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_0_11:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_0_11:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_0_11:
+; GFX1012-SDAG: ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_0_11:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_0_11:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_0_11:
+; GFX1250-SDAG: ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_0_11:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_0_11:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_0_11:
+; GFX906-ISEL: ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_0_11:
+; GFX908-ISEL: ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_0_11:
+; GFX90A-ISEL: ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_0_11:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_0_11:
+; GFX942-ISEL: ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_0_11:
+; GFX950-ISEL: ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_0_11:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_0_11:
+; GFX1012-ISEL: ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_0_11:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_0_11:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_0_11:
+; GFX1250-ISEL: ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_0_11:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !3)
+ ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_saddr_0_00(ptr addrspace(1) inreg %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX906-SDAG: ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX908-SDAG: ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX90A-SDAG: ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX942-SDAG: ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX1012-SDAG: ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX1250-SDAG: ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_00:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX906-ISEL: ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX908-ISEL: ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX90A-ISEL: ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX942-ISEL: ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX950-ISEL: ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX1012-ISEL: ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX1250-ISEL: ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_00:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !0)
+ ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_saddr_0_01(ptr addrspace(1) inreg %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX906-SDAG: ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX908-SDAG: ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX90A-SDAG: ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX942-SDAG: ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX1012-SDAG: ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX1250-SDAG: ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_01:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX906-ISEL: ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX908-ISEL: ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX90A-ISEL: ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX942-ISEL: ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX950-ISEL: ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX1012-ISEL: ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX1250-ISEL: ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_01:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !1)
+ ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_saddr_0_02(ptr addrspace(1) inreg %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX906-SDAG: ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX908-SDAG: ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX90A-SDAG: ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX942-SDAG: ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX1012-SDAG: ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX1250-SDAG: ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_02:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX906-ISEL: ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX908-ISEL: ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX90A-ISEL: ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX942-ISEL: ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX950-ISEL: ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX1012-ISEL: ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX1250-ISEL: ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_02:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !2)
+ ret <4 x i32> %data
+}
+
+define <4 x i32> @global_load_b128_saddr_0_03(ptr addrspace(1) inreg %addr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX906-SDAG: ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX908-SDAG: ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX90A-SDAG: ; %bb.0: ; %entry
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX942-SDAG: ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX1012-SDAG: ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX1250-SDAG: ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_b128_saddr_0_03:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX906-ISEL: ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX908-ISEL: ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX90A-ISEL: ; %bb.0: ; %entry
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX942-ISEL: ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX950-ISEL: ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX1012-ISEL: ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX1250-ISEL: ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_b128_saddr_0_03:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !3)
+ ret <4 x i32> %data
+}
+
+;;==============================================================================
+;; Signed offset addressing modes (derived from global-saddr-load.ll) {
+;;==============================================================================
+;;------------------------------------------------------------------------------
+;; No vgpr offset, constants
+;;------------------------------------------------------------------------------
+
+;; base only
+define <4 x float> @global_load_i8_offset_0(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %sbase, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; base with maximum gfx9 immediate offset
+define <4 x float> @global_load_i8_offset_4095(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_4095:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_4095:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; base with maximum gfx9 immediate offset + 1
+define <4 x float> @global_load_i8_offset_4096(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_4096:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_4096:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4096
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; base with maximum gfx9 immediate offset + 2
+define <4 x float> @global_load_i8_offset_4097(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4097 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_4097:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4097 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1001, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1001, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1001, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1001, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1001, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4097 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_4097:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4097 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4097
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; base with maximum negative gfx9 immediate offset
+define <4 x float> @global_load_i8_offset_neg4096(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4096:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4096:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4096
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; base with maximum negative gfx9 immediate offset -1
+define <4 x float> @global_load_i8_offset_neg4097(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4097:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4097:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4097
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; base with maximum negative gfx9 immediate offset -2
+define <4 x float> @global_load_i8_offset_neg4098(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4098 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg4098:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4098 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffeffe, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffeffe, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffeffe, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffeffe, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffeffe, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4098 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg4098:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4098 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4098
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; base with maximum gfx10 immediate offset
+define <4 x float> @global_load_i8_offset_2048(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_2048:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_2048:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2048
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; base with maximum gfx10 immediate offset + 1
+define <4 x float> @global_load_i8_offset_2049(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_2049:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2049
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x801, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x801, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x801, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_2049:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2049
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2049
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; base with maximum gfx10 immediate offset + 2
+define <4 x float> @global_load_i8_offset_2050(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_2050:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2050 sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x802, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x802, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x802, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_2050:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2050 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2050
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; base with maximum negative gfx10 immediate offset
+define <4 x float> @global_load_i8_offset_neg2048(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2048:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2048:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2048
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; base with maximum negative gfx10 immediate offset - 1
+define <4 x float> @global_load_i8_offset_neg2049(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2049:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2049:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2049
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; base with maximum negative gfx10 immediate offset - 1
+define <4 x float> @global_load_i8_offset_neg2050(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg2050:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2050
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7fe, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7fe, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7fe, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg2050:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2050
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2050
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0x7FFFFF(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0x7FFFFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 8388607
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0xFFFFFF(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -8388608
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0xFFFFFFFF(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0xFFFFFFFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967295
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0x100000000(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000000:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000000:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967296
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0x100000001(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000001:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000001:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967297
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0x100000FFF(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100000FFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971391
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_0x100001000(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_0x100001000:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_0x100001000:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971392
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_neg0xFFFFFFFF(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2047
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2047
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2047
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4095
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388607
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388607
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0xFFFFFFFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967295
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_neg0x100000000(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_u32_e32 v1, -1, v1
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_u32_e32 v1, -1, v1
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_u32_e32 v1, -1, v1
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, -1, v1
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_u32_e32 v1, -1, v1
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_u32_e32 v1, -1, v1
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_u32_e32 v1, -1, v1
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_nc_u32_e32 v1, -1, v1
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_nc_u32_e32 v1, -1, v1
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_nc_u32_e32 v1, -1, v1
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_nc_u32_e32 v1, -1, v1
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v1, -1, v1
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_nc_u32_e32 v1, -1, v1
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000000:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967296
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_neg0x100000001(ptr addrspace(1) %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -2, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -2, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -2, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -2, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -2, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_neg0x100000001:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, -1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -2, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967297
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Basic addressing patterns
+;;------------------------------------------------------------------------------
+
+;; Basic pattern, no immediate offset.
+define <4 x float> @global_load_i8_zext_vgpr(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx9
+define <4 x float> @global_load_i8_zext_vgpr_offset_4095(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx9 + 1
+define <4 x float> @global_load_i8_zext_vgpr_offset_4096(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4096:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4096
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx9
+define <4 x float> @global_load_i8_zext_vgpr_offset_neg4096(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4096 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4096:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4096 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4096
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx9 - 1
+define <4 x float> @global_load_i8_zext_vgpr_offset_neg4097(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg4097:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4097 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4097
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx10
+define <4 x float> @global_load_i8_zext_vgpr_offset_2047(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2047
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx10 + 1
+define <4 x float> @global_load_i8_zext_vgpr_offset_2048(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2048 sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_2048:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:2048 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2048
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx10
+define <4 x float> @global_load_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2048 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2048 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx10 - 1
+define <4 x float> @global_load_i8_zext_vgpr_offset_neg2049(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2049 sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_neg2049:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2049 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2049
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx12.
+define <4 x float> @global_load_i8_zext_vgpr_offset_0x7FFFFF(ptr addrspace(1) %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 8388607
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Minimum offset on gfx12.
+define <4 x float> @global_load_i8_zext_vgpr_offset_0xFFFFFF(ptr addrspace(1) %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388608 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -8388608
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+
+;; Maximum positive offset on gfx9, and immediate needs to be moved lower.
+define <4 x float> @global_load_i8_zext_vgpr_offset_4095_gep_order(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_offset_4095_gep_order:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; pointer addressing done in integers
+define <4 x float> @global_load_i8_zext_vgpr_ptrtoint(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+ %add = add i64 %sbase.as.int, %zext.offset
+ %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; zext forced to LHS of addressing expression
+define <4 x float> @global_load_i8_zext_vgpr_ptrtoint_commute_add(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+ %add = add i64 %zext.offset, %sbase.as.int
+ %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; zext forced to LHS of addressing expression, with immediate offset
+define <4 x float> @global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+ %add = add i64 %zext.offset, %sbase.as.int
+ %add.immoffset = add i64 %add, 128
+ %dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; zext forced to LHS of addressing expression, with immediate offset in non-canonical position
+define <4 x float> @global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1(ptr addrspace(1) %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+ %add.immoffset = add i64 %sbase.as.int, 128
+ %add = add i64 %zext.offset, %add.immoffset
+ %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Uniformity edge cases
+;;------------------------------------------------------------------------------
+
+;; Both 64-bit base and 32-bit offset are scalar
+define <4 x float> @global_load_i8_zext_uniform_offset(ptr addrspace(1) %sbase, i32 %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %soffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Both 64-bit base and 32-bit offset are scalar, with immediate offset.
+define <4 x float> @global_load_i8_zext_uniform_offset_immoffset(ptr addrspace(1) %sbase, i32 %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-24
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_uniform_offset_immoffset:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-24
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %soffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -24
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Both components uniform, zext forced to LHS of addressing expression
+define <4 x float> @global_load_i8_zext_sgpr_ptrtoint_commute_add(ptr addrspace(1) %sbase, i32 %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %soffset to i64
+ %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+ %add = add i64 %zext.offset, %sbase.as.int
+ %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Both components uniform, zext forced to LHS of addressing expression, with immediate offset
+define <4 x float> @global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) %sbase, i32 %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %soffset to i64
+ %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+ %add = add i64 %zext.offset, %sbase.as.int
+ %add.immoffset = add i64 %add, 128
+ %dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; divergent 64-bit base, 32-bit scalar offset.
+define <4 x float> @global_load_i8_vgpr64_sgpr32(ptr addrspace(1) %vbase, i32 %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %soffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; divergent 64-bit base, 32-bit scalar offset, with imm offset
+define <4 x float> @global_load_i8_vgpr64_sgpr32_offset_4095(ptr addrspace(1) %vbase, i32 %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %soffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Natural addressing shifts with restricted range
+;;------------------------------------------------------------------------------
+
+;; Cannot push the shift into 32-bits, and cannot match.
+define <4 x float> @global_load_f32_natural_addressing(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-SDAG-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr
+ %zext.offset = zext i32 %voffset to i64
+ %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Cannot push the shift into 32-bits, with an immediate offset.
+define <4 x float> @global_load_f32_natural_addressing_immoffset(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:128 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_natural_addressing_immoffset:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:128 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 128
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Range is sufficiently restricted to push the shift into 32-bits.
+define <4 x float> @global_load_f32_zext_vgpr_range(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v2
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX12-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{}
+ %zext.offset = zext i32 %voffset to i64
+ %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Range is sufficiently restricted to push the shift into 32-bits, with an imm offset
+define <4 x float> @global_load_f32_zext_vgpr_range_imm_offset(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v2
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX12-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:400
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_imm_offset:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v2
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:400
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{}
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds float, ptr addrspace(1) %gep0, i64 100
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Range is 1 beyond the limit where we can move the shift into 32-bits.
+define <4 x float> @global_load_f32_zext_vgpr_range_too_large(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-SDAG-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_zext_vgpr_range_too_large:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !5, !noundef !{}
+ %zext.offset = zext i32 %voffset to i64
+ %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; or-with-constant as add
+;;------------------------------------------------------------------------------
+
+;; Check add-as-or with split 64-bit or.
+define <4 x float> @global_load_i8_offset_or_i64_imm_offset_16(ptr addrspace(6) %sbase, i32 %idx) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-1-GENERIC-SDAG-NEXT: v_or_b32_e32 v1, 16, v1
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX1012-SDAG-NEXT: v_or_b32_e32 v1, 16, v1
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-3-GENERIC-SDAG-NEXT: v_or_b32_e32 v1, 16, v1
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-GENERIC-SDAG-NEXT: v_or_b32_e32 v1, 16, v1
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[1:2], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_bitop2_b32 v2, 16, v1 bitop3:0x54
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[2:3], off scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX12-GENERIC-SDAG-NEXT: v_or_b32_e32 v1, 16, v1
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[1:2], off scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_or_b32_e32 v0, 16, v1
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_or_b32_e32 v1, 16, v1
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX1012-ISEL-NEXT: v_or_b32_e32 v1, 16, v1
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_or_b32_e32 v1, 16, v1
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-GENERIC-ISEL-NEXT: v_or_b32_e32 v1, 16, v1
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[1:2], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_bitop2_b32 v2, 16, v1 bitop3:0x54
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[2:3], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_16:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX12-GENERIC-ISEL-NEXT: v_or_b32_e32 v1, 16, v1
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[1:2], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.idx = zext i32 %idx to i64
+ %or = or i64 %zext.idx, 16
+ %addr = inttoptr i64 %or to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_i8_offset_or_i64_imm_offset_4160(ptr addrspace(6) %sbase, i32 %idx) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-1-GENERIC-SDAG-NEXT: v_or_b32_e32 v1, 0x1040, v1
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX1012-SDAG-NEXT: v_or_b32_e32 v1, 0x1040, v1
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-3-GENERIC-SDAG-NEXT: v_or_b32_e32 v1, 0x1040, v1
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-GENERIC-SDAG-NEXT: v_or_b32_e32 v1, 0x1040, v1
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[1:2], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: v_or_b32_e32 v2, 0x1040, v1
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[2:3], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX12-GENERIC-SDAG-NEXT: v_or_b32_e32 v1, 0x1040, v1
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[1:2], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v1
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_or_b32_e32 v1, 0x1040, v1
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX1012-ISEL-NEXT: v_or_b32_e32 v1, 0x1040, v1
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_or_b32_e32 v1, 0x1040, v1
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[1:2], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-GENERIC-ISEL-NEXT: v_or_b32_e32 v1, 0x1040, v1
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[1:2], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-ISEL-NEXT: v_or_b32_e32 v2, 0x1040, v1
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[2:3], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_i8_offset_or_i64_imm_offset_4160:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX12-GENERIC-ISEL-NEXT: v_or_b32_e32 v1, 0x1040, v1
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[1:2], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.idx = zext i32 %idx to i64
+ %or = or i64 %zext.idx, 4160
+ %addr = inttoptr i64 %or to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Full 64-bit scalar add.
+;;------------------------------------------------------------------------------
+define <4 x float> @global_addr_64bit_lsr_iv(ptr addrspace(1) %arg) {
+; GFX9-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX9-GENERIC-SDAG-NEXT: .LBB60_1: ; %bb3
+; GFX9-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX9-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX9-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
+; GFX9-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX9-GENERIC-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX9-GENERIC-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s5
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX906-SDAG: ; %bb.0: ; %bb
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX906-SDAG-NEXT: .LBB60_1: ; %bb3
+; GFX906-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX906-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX906-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX906-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
+; GFX906-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX906-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX906-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s5
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX908-SDAG: ; %bb.0: ; %bb
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX908-SDAG-NEXT: .LBB60_1: ; %bb3
+; GFX908-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX908-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX908-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX908-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
+; GFX908-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX908-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX908-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v2, s5
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX90A-SDAG: ; %bb.0: ; %bb
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX90A-SDAG-NEXT: .LBB60_1: ; %bb3
+; GFX90A-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX90A-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX90A-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX90A-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
+; GFX90A-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX90A-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX90A-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v2, s5
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_mov_b32 s0, -1
+; GFX9-4-GENERIC-SDAG-NEXT: .LBB60_1: ; %bb3
+; GFX9-4-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-SDAG-NEXT: s_add_i32 s0, s0, 1
+; GFX9-4-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff
+; GFX9-4-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
+; GFX9-4-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX942-SDAG: ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_mov_b32 s0, -1
+; GFX942-SDAG-NEXT: .LBB60_1: ; %bb3
+; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX942-SDAG-NEXT: s_add_i32 s0, s0, 1
+; GFX942-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff
+; GFX942-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
+; GFX942-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX942-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX950-SDAG: ; %bb.0: ; %bb
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_mov_b32 s0, -1
+; GFX950-SDAG-NEXT: .LBB60_1: ; %bb3
+; GFX950-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX950-SDAG-NEXT: s_add_i32 s0, s0, 1
+; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff
+; GFX950-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
+; GFX950-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX950-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX10-1-GENERIC-SDAG-NEXT: .LBB60_1: ; %bb3
+; GFX10-1-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX10-1-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX10-1-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
+; GFX10-1-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX10-1-GENERIC-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s4
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX1012-SDAG: ; %bb.0: ; %bb
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX1012-SDAG-NEXT: .LBB60_1: ; %bb3
+; GFX1012-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1012-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX1012-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX1012-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
+; GFX1012-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX1012-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX1012-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s4
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX10-3-GENERIC-SDAG-NEXT: .LBB60_1: ; %bb3
+; GFX10-3-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX10-3-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX10-3-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
+; GFX10-3-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX10-3-GENERIC-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s4
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s5, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_mov_b32 s0, -1
+; GFX11-GENERIC-SDAG-NEXT: .LBB60_1: ; %bb3
+; GFX11-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT: s_add_i32 s0, s0, 1
+; GFX11-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff
+; GFX11-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
+; GFX11-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX11-GENERIC-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX1250-SDAG: ; %bb.0: ; %bb
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_mov_b32 s0, -1
+; GFX1250-SDAG-NEXT: .LBB60_1: ; %bb3
+; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s0, 0xff
+; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
+; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX1250-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_mov_b32 s0, -1
+; GFX12-GENERIC-SDAG-NEXT: .LBB60_1: ; %bb3
+; GFX12-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_add_co_i32 s0, s0, 1
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_cmp_eq_u32 s0, 0xff
+; GFX12-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB60_1
+; GFX12-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX12-GENERIC-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX9-GENERIC-ISEL-NEXT: .LBB60_1: ; %bb3
+; GFX9-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX9-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB60_1
+; GFX9-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX906-ISEL: ; %bb.0: ; %bb
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX906-ISEL-NEXT: .LBB60_1: ; %bb3
+; GFX906-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX906-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX906-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX906-ISEL-NEXT: s_cbranch_vccz .LBB60_1
+; GFX906-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX906-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX908-ISEL: ; %bb.0: ; %bb
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX908-ISEL-NEXT: .LBB60_1: ; %bb3
+; GFX908-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX908-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX908-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX908-ISEL-NEXT: s_cbranch_vccz .LBB60_1
+; GFX908-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX908-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX90A-ISEL: ; %bb.0: ; %bb
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX90A-ISEL-NEXT: .LBB60_1: ; %bb3
+; GFX90A-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX90A-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX90A-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX90A-ISEL-NEXT: s_cbranch_vccz .LBB60_1
+; GFX90A-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_mov_b32 s0, -1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX9-4-GENERIC-ISEL-NEXT: .LBB60_1: ; %bb3
+; GFX9-4-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX9-4-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-4-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB60_1
+; GFX9-4-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX942-ISEL: ; %bb.0: ; %bb
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_mov_b32 s0, -1
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX942-ISEL-NEXT: .LBB60_1: ; %bb3
+; GFX942-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX942-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX942-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-ISEL-NEXT: s_cbranch_vccz .LBB60_1
+; GFX942-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX950-ISEL: ; %bb.0: ; %bb
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_mov_b32 s0, -1
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX950-ISEL-NEXT: .LBB60_1: ; %bb3
+; GFX950-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX950-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX950-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX950-ISEL-NEXT: s_cbranch_vccz .LBB60_1
+; GFX950-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX10-1-GENERIC-ISEL-NEXT: .LBB60_1: ; %bb3
+; GFX10-1-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX10-1-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB60_1
+; GFX10-1-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX1012-ISEL: ; %bb.0: ; %bb
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX1012-ISEL-NEXT: .LBB60_1: ; %bb3
+; GFX1012-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1012-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
+; GFX1012-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX1012-ISEL-NEXT: s_cbranch_vccz .LBB60_1
+; GFX1012-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX10-3-GENERIC-ISEL-NEXT: .LBB60_1: ; %bb3
+; GFX10-3-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX10-3-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB60_1
+; GFX10-3-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_mov_b32 s0, -1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT: .LBB60_1: ; %bb3
+; GFX11-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
+; GFX11-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX11-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB60_1
+; GFX11-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX1250-ISEL: ; %bb.0: ; %bb
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_mov_b32 s0, -1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-ISEL-NEXT: .LBB60_1: ; %bb3
+; GFX1250-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
+; GFX1250-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX1250-ISEL-NEXT: s_cbranch_vccz .LBB60_1
+; GFX1250-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_mov_b32 s0, -1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT: .LBB60_1: ; %bb3
+; GFX12-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
+; GFX12-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX12-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB60_1
+; GFX12-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+bb:
+ br label %bb3
+
+bb2: ; preds = %bb3
+ ret <4 x float> %i6
+
+bb3: ; preds = %bb3, %bb
+ %i = phi i32 [ 0, %bb ], [ %i8, %bb3 ]
+ %i4 = zext i32 %i to i64
+ %i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %i5, metadata !0)
+ %i6 = bitcast <4 x i32> %load to <4 x float>
+ %i8 = add nuw nsw i32 %i, 1
+ %i9 = icmp eq i32 %i8, 256
+ br i1 %i9, label %bb2, label %bb3
+}
+
+;; Make sure we only have a single zero vaddr initialization.
+
+define <4 x float> @global_addr_64bit_lsr_iv_multiload(ptr addrspace(1) %arg, ptr addrspace(1) %arg.1, i32 %x) {
+; GFX9-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX9-GENERIC-SDAG-NEXT: .LBB61_1: ; %bb5
+; GFX9-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX9-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX9-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
+; GFX9-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX9-GENERIC-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX9-GENERIC-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s5
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX906-SDAG: ; %bb.0: ; %bb
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX906-SDAG-NEXT: .LBB61_1: ; %bb5
+; GFX906-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX906-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX906-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX906-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
+; GFX906-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX906-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX906-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s5
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX908-SDAG: ; %bb.0: ; %bb
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX908-SDAG-NEXT: .LBB61_1: ; %bb5
+; GFX908-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX908-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX908-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX908-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
+; GFX908-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX908-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX908-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v2, s5
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX90A-SDAG: ; %bb.0: ; %bb
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX90A-SDAG-NEXT: .LBB61_1: ; %bb5
+; GFX90A-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX90A-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX90A-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX90A-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
+; GFX90A-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX90A-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX90A-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v2, s5
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_mov_b32 s0, -1
+; GFX9-4-GENERIC-SDAG-NEXT: .LBB61_1: ; %bb5
+; GFX9-4-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-SDAG-NEXT: s_add_i32 s0, s0, 1
+; GFX9-4-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff
+; GFX9-4-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
+; GFX9-4-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX942-SDAG: ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_mov_b32 s0, -1
+; GFX942-SDAG-NEXT: .LBB61_1: ; %bb5
+; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX942-SDAG-NEXT: s_add_i32 s0, s0, 1
+; GFX942-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff
+; GFX942-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
+; GFX942-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX942-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX950-SDAG: ; %bb.0: ; %bb
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_mov_b32 s0, -1
+; GFX950-SDAG-NEXT: .LBB61_1: ; %bb5
+; GFX950-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX950-SDAG-NEXT: s_add_i32 s0, s0, 1
+; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff
+; GFX950-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
+; GFX950-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX950-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX10-1-GENERIC-SDAG-NEXT: .LBB61_1: ; %bb5
+; GFX10-1-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX10-1-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX10-1-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
+; GFX10-1-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX10-1-GENERIC-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s4
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX1012-SDAG: ; %bb.0: ; %bb
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX1012-SDAG-NEXT: .LBB61_1: ; %bb5
+; GFX1012-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1012-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX1012-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX1012-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
+; GFX1012-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX1012-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX1012-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s4
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX10-3-GENERIC-SDAG-NEXT: .LBB61_1: ; %bb5
+; GFX10-3-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX10-3-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX10-3-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
+; GFX10-3-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX10-3-GENERIC-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s4
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s5, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_mov_b32 s0, -1
+; GFX11-GENERIC-SDAG-NEXT: .LBB61_1: ; %bb5
+; GFX11-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT: s_add_i32 s0, s0, 1
+; GFX11-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s0, 0xff
+; GFX11-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
+; GFX11-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX11-GENERIC-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX1250-SDAG: ; %bb.0: ; %bb
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_mov_b32 s0, -1
+; GFX1250-SDAG-NEXT: .LBB61_1: ; %bb5
+; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s0, 0xff
+; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
+; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX1250-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_mov_b32 s0, -1
+; GFX12-GENERIC-SDAG-NEXT: .LBB61_1: ; %bb5
+; GFX12-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_add_co_i32 s0, s0, 1
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_cmp_eq_u32 s0, 0xff
+; GFX12-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB61_1
+; GFX12-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX12-GENERIC-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX9-GENERIC-ISEL-NEXT: .LBB61_1: ; %bb5
+; GFX9-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX9-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB61_1
+; GFX9-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX906-ISEL: ; %bb.0: ; %bb
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX906-ISEL-NEXT: .LBB61_1: ; %bb5
+; GFX906-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX906-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX906-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX906-ISEL-NEXT: s_cbranch_vccz .LBB61_1
+; GFX906-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX906-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX908-ISEL: ; %bb.0: ; %bb
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX908-ISEL-NEXT: .LBB61_1: ; %bb5
+; GFX908-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX908-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX908-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX908-ISEL-NEXT: s_cbranch_vccz .LBB61_1
+; GFX908-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX908-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX90A-ISEL: ; %bb.0: ; %bb
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX90A-ISEL-NEXT: .LBB61_1: ; %bb5
+; GFX90A-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX90A-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX90A-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX90A-ISEL-NEXT: s_cbranch_vccz .LBB61_1
+; GFX90A-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_mov_b32 s0, -1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX9-4-GENERIC-ISEL-NEXT: .LBB61_1: ; %bb5
+; GFX9-4-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX9-4-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-4-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB61_1
+; GFX9-4-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX942-ISEL: ; %bb.0: ; %bb
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_mov_b32 s0, -1
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX942-ISEL-NEXT: .LBB61_1: ; %bb5
+; GFX942-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX942-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX942-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX942-ISEL-NEXT: s_cbranch_vccz .LBB61_1
+; GFX942-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX950-ISEL: ; %bb.0: ; %bb
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_mov_b32 s0, -1
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v3, 0xff
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX950-ISEL-NEXT: .LBB61_1: ; %bb5
+; GFX950-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX950-ISEL-NEXT: v_add_u32_e32 v2, 1, v2
+; GFX950-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX950-ISEL-NEXT: s_cbranch_vccz .LBB61_1
+; GFX950-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX10-1-GENERIC-ISEL-NEXT: .LBB61_1: ; %bb5
+; GFX10-1-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX10-1-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB61_1
+; GFX10-1-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX1012-ISEL: ; %bb.0: ; %bb
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX1012-ISEL-NEXT: .LBB61_1: ; %bb5
+; GFX1012-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1012-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
+; GFX1012-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX1012-ISEL-NEXT: s_cbranch_vccz .LBB61_1
+; GFX1012-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX10-3-GENERIC-ISEL-NEXT: .LBB61_1: ; %bb5
+; GFX10-3-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX10-3-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB61_1
+; GFX10-3-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_mov_b32 s0, -1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT: .LBB61_1: ; %bb5
+; GFX11-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
+; GFX11-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX11-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB61_1
+; GFX11-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX1250-ISEL: ; %bb.0: ; %bb
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_mov_b32 s0, -1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-ISEL-NEXT: .LBB61_1: ; %bb5
+; GFX1250-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
+; GFX1250-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX1250-ISEL-NEXT: s_cbranch_vccz .LBB61_1
+; GFX1250-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_addr_64bit_lsr_iv_multiload:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_mov_b32 s0, -1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT: .LBB61_1: ; %bb5
+; GFX12-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
+; GFX12-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v2
+; GFX12-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB61_1
+; GFX12-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+bb:
+ br label %bb5
+
+bb2:
+ %y = icmp eq i32 %x, 0
+ br i1 %y, label %bb3, label %bb4
+
+bb3:
+ ret <4 x float> %i6
+
+bb4:
+ ret <4 x float> %i6.1
+
+bb5:
+ %i = phi i32 [ 0, %bb ], [ %i8, %bb5 ]
+ %i4 = zext i32 %i to i64
+ %i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %i5, metadata !1)
+ %i6 = bitcast <4 x i32> %load to <4 x float>
+ %i5.1 = getelementptr inbounds float, ptr addrspace(1) %arg.1, i64 %i4
+ %load.1 = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %i5, metadata !2)
+ %i6.1 = bitcast <4 x i32> %load to <4 x float>
+ %i8 = add nuw nsw i32 %i, 1
+ %i9 = icmp eq i32 %i8, 256
+ br i1 %i9, label %bb2, label %bb5
+}
+;;==============================================================================
+;; } end signed offset addressing modes
+;;==============================================================================
+
+;;==============================================================================
+;; Various saddr addressing modes (derived from global-saddr-load.ll) {
+;;==============================================================================
+
+;;------------------------------------------------------------------------------
+;; No vgpr offset, constants
+;;------------------------------------------------------------------------------
+
+;; SGPR base only
+define <4 x float> @global_load_saddr_i8_offset_0(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %sbase, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum gfx9 immediate offset
+define <4 x float> @global_load_saddr_i8_offset_4095(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4095:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4095:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum gfx9 immediate offset + 1
+define <4 x float> @global_load_saddr_i8_offset_4096(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4096:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4096:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4096
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum gfx9 immediate offset + 2
+define <4 x float> @global_load_saddr_i8_offset_4097(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:1 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4097 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_4097:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4097 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:1 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:1 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4097 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_4097:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4097 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4097
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum negative gfx9 immediate offset
+define <4 x float> @global_load_saddr_i8_offset_neg4096(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff000
+; GFX10-1-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff000
+; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff000
+; GFX10-3-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4096
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum negative gfx9 immediate offset -1
+define <4 x float> @global_load_saddr_i8_offset_neg4097(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_add_u32 s4, s16, 0xffffefff
+; GFX9-GENERIC-SDAG-NEXT: s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0xffffefff
+; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, -1
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: s_add_u32 s4, s16, 0xffffefff
+; GFX908-SDAG-NEXT: s_addc_u32 s5, s17, -1
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: s_add_u32 s4, s16, 0xffffefff
+; GFX90A-SDAG-NEXT: s_addc_u32 s5, s17, -1
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_add_u32 s0, s0, 0xffffefff
+; GFX9-4-GENERIC-SDAG-NEXT: s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0xffffefff
+; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, -1
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_add_u32 s0, s0, 0xffffefff
+; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, -1
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xffffefff
+; GFX9-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0xffffefff
+; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_add_u32 s4, s16, 0xffffefff
+; GFX908-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_add_u32 s4, s16, 0xffffefff
+; GFX90A-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0xffffefff
+; GFX9-4-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0xffffefff
+; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_add_u32 s0, s0, 0xffffefff
+; GFX950-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xffffefff
+; GFX10-1-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xffffefff
+; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xffffefff
+; GFX10-3-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0xffffefff
+; GFX11-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4097
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum negative gfx9 immediate offset -2
+define <4 x float> @global_load_saddr_i8_offset_neg4098(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_add_u32 s4, s16, 0xffffeffe
+; GFX9-GENERIC-SDAG-NEXT: s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0xffffeffe
+; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, -1
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: s_add_u32 s4, s16, 0xffffeffe
+; GFX908-SDAG-NEXT: s_addc_u32 s5, s17, -1
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: s_add_u32 s4, s16, 0xffffeffe
+; GFX90A-SDAG-NEXT: s_addc_u32 s5, s17, -1
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_add_u32 s0, s0, 0xffffeffe
+; GFX9-4-GENERIC-SDAG-NEXT: s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0xffffeffe
+; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, -1
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_add_u32 s0, s0, 0xffffeffe
+; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, -1
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff000, s16
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-2 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4098
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4098 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xffffeffe
+; GFX9-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0xffffeffe
+; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_add_u32 s4, s16, 0xffffeffe
+; GFX908-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_add_u32 s4, s16, 0xffffeffe
+; GFX90A-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0xffffeffe
+; GFX9-4-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0xffffeffe
+; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_add_u32 s0, s0, 0xffffeffe
+; GFX950-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xffffeffe
+; GFX10-1-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xffffeffe
+; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xffffeffe
+; GFX10-3-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0xffffeffe
+; GFX11-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4098
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4098 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4098
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum gfx10 immediate offset
+define <4 x float> @global_load_saddr_i8_offset_2048(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2048:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2048:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2048
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum gfx10 immediate offset + 1
+define <4 x float> @global_load_saddr_i8_offset_2049(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2049:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2049 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2049 sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:1 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2049:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2049 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2049
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum gfx10 immediate offset + 2
+define <4 x float> @global_load_saddr_i8_offset_2050(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_2050:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2050
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2050
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x800
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_2050:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2050
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 2050
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum negative gfx10 immediate offset
+define <4 x float> @global_load_saddr_i8_offset_neg2048(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2048
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum negative gfx10 immediate offset - 1
+define <4 x float> @global_load_saddr_i8_offset_neg2049(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff800, s16
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff800, s16
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff800, s16
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff7ff
+; GFX10-1-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff7ff
+; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff7ff
+; GFX10-3-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2049
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; SGPR base with maximum negative gfx10 immediate offset - 1
+define <4 x float> @global_load_saddr_i8_offset_neg2050(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff800, s16
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff800, s16
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0xfffff800, s16
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2050 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2050 sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff7fe
+; GFX10-1-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff7fe
+; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xfffff7fe
+; GFX10-3-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2050 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2050
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0x7FFFFF(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff800
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff800
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff800
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff800
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff800
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff800
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x7FFFFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 8388607
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0xFFFFFF(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_add_u32 s4, s16, 0xff800000
+; GFX9-GENERIC-SDAG-NEXT: s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0xff800000
+; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, -1
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: s_add_u32 s4, s16, 0xff800000
+; GFX908-SDAG-NEXT: s_addc_u32 s5, s17, -1
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: s_add_u32 s4, s16, 0xff800000
+; GFX90A-SDAG-NEXT: s_addc_u32 s5, s17, -1
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_add_u32 s0, s0, 0xff800000
+; GFX9-4-GENERIC-SDAG-NEXT: s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0xff800000
+; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, -1
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_add_u32 s0, s0, 0xff800000
+; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, -1
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0xff800000, s16
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0xff800000, s16
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0xff800000, s16
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, 0xff800000, s0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xff800000
+; GFX9-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0xff800000
+; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_add_u32 s4, s16, 0xff800000
+; GFX908-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_add_u32 s4, s16, 0xff800000
+; GFX90A-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0xff800000
+; GFX9-4-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0xff800000
+; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_add_u32 s0, s0, 0xff800000
+; GFX950-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xff800000
+; GFX10-1-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xff800000
+; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xff800000
+; GFX10-3-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0xff800000
+; GFX11-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -8388608
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0xFFFFFFFF(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff800
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff800
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff800
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0xff800000, s0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:8388607 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0xff800000
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff800
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff800
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff800
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0xfffff000
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, -1
+; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0xff800000
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967295
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0x100000000(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_add_i32 s17, s17, 1
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: s_add_i32 s17, s17, 1
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: s_add_i32 s17, s17, 1
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: s_add_i32 s17, s17, 1
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_add_i32 s1, s1, 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_add_i32 s1, s1, 1
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_add_i32 s1, s1, 1
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT: s_add_i32 s17, s17, 1
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT: s_add_i32 s17, s17, 1
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT: s_add_i32 s17, s17, 1
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: s_add_i32 s1, s1, 1
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, 1
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: s_add_co_i32 s1, s1, 1
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX9-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX908-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX90A-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0
+; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_add_u32 s0, s0, 0
+; GFX950-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX10-1-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX10-3-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0
+; GFX11-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 0
+; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000000:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_u32 s0, s0, 0
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967296
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0x100000001(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX942-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX950-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0, s16
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0, s16
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0, s16
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:1
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:1
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX9-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX908-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX90A-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 1
+; GFX9-4-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 1
+; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_add_u32 s0, s0, 1
+; GFX950-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX10-1-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX10-3-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 1
+; GFX11-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 1
+; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000001:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_u32 s0, s0, 1
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294967297
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0x100000FFF(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_add_u32 s4, s16, 0xfff
+; GFX9-GENERIC-SDAG-NEXT: s_addc_u32 s5, s17, 1
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0xfff
+; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, 1
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: s_add_u32 s4, s16, 0xfff
+; GFX908-SDAG-NEXT: s_addc_u32 s5, s17, 1
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: s_add_u32 s4, s16, 0xfff
+; GFX90A-SDAG-NEXT: s_addc_u32 s5, s17, 1
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_add_u32 s0, s0, 0xfff
+; GFX9-4-GENERIC-SDAG-NEXT: s_addc_u32 s1, s1, 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0xfff
+; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 1
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_add_u32 s0, s0, 0xfff
+; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 1
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0x800, s16
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0x800, s16
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0x800, s16
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xfff
+; GFX9-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0xfff
+; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_add_u32 s4, s16, 0xfff
+; GFX908-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_add_u32 s4, s16, 0xfff
+; GFX90A-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0xfff
+; GFX9-4-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0xfff
+; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_add_u32 s0, s0, 0xfff
+; GFX950-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xfff
+; GFX10-1-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0xfff
+; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0xfff
+; GFX10-3-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0xfff
+; GFX11-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 0xfff
+; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100000FFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_u32 s0, s0, 0xfff
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971391
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_0x100001000(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_add_u32 s4, s16, 0x1000
+; GFX9-GENERIC-SDAG-NEXT: s_addc_u32 s5, s17, 1
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: s_add_u32 s4, s16, 0x1000
+; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, 1
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: s_add_u32 s4, s16, 0x1000
+; GFX908-SDAG-NEXT: s_addc_u32 s5, s17, 1
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: s_add_u32 s4, s16, 0x1000
+; GFX90A-SDAG-NEXT: s_addc_u32 s5, s17, 1
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_add_u32 s0, s0, 0x1000
+; GFX9-4-GENERIC-SDAG-NEXT: s_addc_u32 s1, s1, 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x1000
+; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 1
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_add_u32 s0, s0, 0x1000
+; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, 1
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0x1000, s16
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0x1000, s16
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, 1, s17, s4
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0x1000, s16
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s1, s0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4096 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0x1000
+; GFX9-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0x1000
+; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_add_u32 s4, s16, 0x1000
+; GFX908-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_add_u32 s4, s16, 0x1000
+; GFX90A-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0x1000
+; GFX9-4-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0x1000
+; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_add_u32 s0, s0, 0x1000
+; GFX950-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0x1000
+; GFX10-1-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0x1000
+; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0x1000
+; GFX10-3-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, 1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0x1000
+; GFX11-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, 1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 0x1000
+; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_0x100001000:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_u32 s0, s0, 0x1000
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4294971392
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_neg0xFFFFFFFF(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s16
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s16
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, s16
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, s16
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX942-SDAG-NEXT: s_nop 0
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX950-SDAG-NEXT: s_nop 0
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-4095 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0x800, s16
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0x800, s16
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2047 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0x800, s16
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-4095 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0x800000, s0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388607 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, 0x800000, s0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-8388607 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX9-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX908-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX90A-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 1
+; GFX9-4-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 1
+; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_add_u32 s0, s0, 1
+; GFX950-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX10-1-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 1
+; GFX10-3-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 1
+; GFX11-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 1
+; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_u32 s0, s0, 1
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967295
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_neg0x100000000(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_add_i32 s17, s17, -1
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: s_add_i32 s17, s17, -1
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: s_add_i32 s17, s17, -1
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: s_add_i32 s17, s17, -1
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_add_i32 s1, s1, -1
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_add_i32 s1, s1, -1
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_add_i32 s1, s1, -1
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT: s_add_i32 s17, s17, -1
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT: s_add_i32 s17, s17, -1
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT: s_add_i32 s17, s17, -1
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: s_add_i32 s1, s1, -1
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, -1
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: s_add_co_i32 s1, s1, -1
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX9-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX908-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX90A-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_add_u32 s0, s0, 0
+; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_add_u32 s0, s0, 0
+; GFX950-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX10-1-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, 0
+; GFX10-3-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, 0
+; GFX11-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, -1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, 0
+; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000000:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_u32 s0, s0, 0
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967296
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_neg0x100000001(ptr addrspace(1) inreg %sbase) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s16
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX942-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX950-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0, s16
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, 0, s16
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, -1, s17, s4
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, 0, s16
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s17, s4
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, -1
+; GFX9-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -2
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_add_u32 s4, s16, -1
+; GFX906-ISEL-NEXT: s_addc_u32 s5, s17, -2
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_add_u32 s4, s16, -1
+; GFX908-ISEL-NEXT: s_addc_u32 s5, s17, -2
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_add_u32 s4, s16, -1
+; GFX90A-ISEL-NEXT: s_addc_u32 s5, s17, -2
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, -1
+; GFX9-4-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, -2
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_add_u32 s0, s0, -1
+; GFX942-ISEL-NEXT: s_addc_u32 s1, s1, -2
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_add_u32 s0, s0, -1
+; GFX950-ISEL-NEXT: s_addc_u32 s1, s1, -2
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, -1
+; GFX10-1-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -2
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_add_u32 s4, s16, -1
+; GFX1012-ISEL-NEXT: s_addc_u32 s5, s17, -2
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_add_u32 s4, s16, -1
+; GFX10-3-GENERIC-ISEL-NEXT: s_addc_u32 s5, s17, -2
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_add_u32 s0, s0, -1
+; GFX11-GENERIC-ISEL-NEXT: s_addc_u32 s1, s1, -2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, -1
+; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, -2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_neg0x100000001:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_u32 s0, s0, -1
+; GFX12-GENERIC-ISEL-NEXT: s_add_co_ci_u32 s1, s1, -2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4294967297
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Basic addressing patterns
+;;------------------------------------------------------------------------------
+
+;; Basic pattern, no immediate offset.
+define <4 x float> @global_load_saddr_i8_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx9
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_4095(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx9 + 1
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_4096(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, s0, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4096
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4096
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx9
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg4096(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-4096
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-4096 sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4096 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4096
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx9 - 1
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg4097(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, s0, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:-1 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffefff, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffefff, v0
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-4097 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -4097
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx10
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_2047(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2047 sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2047 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2047 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx10 + 1
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_2048(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:2048
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:2048
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:2048
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2048
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx10
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2048 sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2048 glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2048 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum negative offset on gfx10 - 1
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_neg2049(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:-1 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-2049 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-2049 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff7ff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-2049 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2049
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Maximum positive offset on gfx12.
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF(ptr addrspace(1) inreg %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, s0, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0x7FFFFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:8388607 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 8388607
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Minimum offset on gfx12.
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF(ptr addrspace(1) inreg %sbase, i32 %voffset) { %zext.offset = zext i32 %voffset to i64
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, s17
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 1
+; GFX9-4-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX942-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX950-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX950-SDAG-NEXT: s_nop 1
+; GFX950-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s0, s0, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_0xFFFFFF:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-8388608
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -8388608
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+
+;; Maximum positive offset on gfx9, and immediate needs to be moved lower.
+define <4 x float> @global_load_saddr_i8_zext_vgpr_offset_4095_gep_order(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s4, s17, 0, s4
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, s4, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s17, 0, s4
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:4095
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:4095 sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v2, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s17
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v2, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:4095 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 4095
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; pointer addressing done in integers
+define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+ %add = add i64 %sbase.as.int, %zext.offset
+ %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; zext forced to LHS of addressing expression
+define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+ %add = add i64 %zext.offset, %sbase.as.int
+ %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; zext forced to LHS of addressing expression, with immediate offset
+define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+ %add = add i64 %zext.offset, %sbase.as.int
+ %add.immoffset = add i64 %add, 128
+ %dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; zext forced to LHS of addressing expression, with immediate offset in non-canonical position
+define <4 x float> @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1(ptr addrspace(1) inreg %sbase, i32 %voffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+ %add.immoffset = add i64 %sbase.as.int, 128
+ %add = add i64 %zext.offset, %add.immoffset
+ %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Uniformity edge cases
+;;------------------------------------------------------------------------------
+
+;; Both 64-bit base and 32-bit offset are scalar
+define <4 x float> @global_load_saddr_i8_zext_uniform_offset(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, s2
+; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %soffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Both 64-bit base and 32-bit offset are scalar, with immediate offset.
+define <4 x float> @global_load_saddr_i8_zext_uniform_offset_immoffset(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:-24 sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:-24 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, s2
+; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:-24 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %soffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -24
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Both components uniform, zext forced to LHS of addressing expression
+define <4 x float> @global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17]
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, s2
+; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %soffset to i64
+ %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+ %add = add i64 %zext.offset, %sbase.as.int
+ %dirty.gep = inttoptr i64 %add to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Both components uniform, zext forced to LHS of addressing expression, with immediate offset
+define <4 x float> @global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0(ptr addrspace(1) inreg %sbase, i32 inreg %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s18
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: s_add_co_u32 s0, s0, s2
+; GFX1250-ISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %soffset to i64
+ %sbase.as.int = ptrtoint ptr addrspace(1) %sbase to i64
+ %add = add i64 %zext.offset, %sbase.as.int
+ %add.immoffset = add i64 %add, 128
+ %dirty.gep = inttoptr i64 %add.immoffset to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %dirty.gep, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; divergent 64-bit base, 32-bit scalar offset.
+define <4 x float> @global_load_saddr_i8_vgpr64_sgpr32(ptr addrspace(1) %vbase, i32 inreg %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s16
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s16
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s16
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_mov_b32 s1, 0
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_mov_b32 s1, 0
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_mov_b32 s1, 0
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_mov_b32 s1, 0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_mov_b32 s1, 0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_mov_b32 s1, 0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %soffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep0, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; divergent 64-bit base, 32-bit scalar offset, with imm offset
+define <4 x float> @global_load_saddr_i8_vgpr64_sgpr32_offset_4095(ptr addrspace(1) %vbase, i32 inreg %soffset) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s16
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s16
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s16
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2047 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_mov_b32 s1, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_mov_b32 s1, 0
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_mov_b32 s1, 0
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_mov_b32 s1, 0
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4095 sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_mov_b32 s17, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_mov_b32 s1, 0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_mov_b32 s1, 0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_vgpr64_sgpr32_offset_4095:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_mov_b32 s1, 0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:4095 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %soffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %vbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 4095
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Natural addressing shifts with restricted range
+;;------------------------------------------------------------------------------
+
+;; Cannot push the shift into 32-bits, and cannot match.
+define <4 x float> @global_load_saddr_f32_natural_addressing(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s17
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s17
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v2, s17
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v2, s17
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, s1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, s1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, s1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s17, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s16, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s17, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s17, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-SDAG-NEXT: v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr
+ %zext.offset = zext i32 %voffset to i64
+ %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Cannot push the shift into 32-bits, with an immediate offset.
+define <4 x float> @global_load_saddr_f32_natural_addressing_immoffset(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:128 sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:128 glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_f32_natural_addressing_immoffset:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:128 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 128
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Range is sufficiently restricted to push the shift into 32-bits.
+define <4 x float> @global_load_f32_saddr_zext_vgpr_range(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{}
+ %zext.offset = zext i32 %voffset to i64
+ %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Range is sufficiently restricted to push the shift into 32-bits, with an imm offset
+define <4 x float> @global_load_f32_saddr_zext_vgpr_range_imm_offset(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] offset:400 sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[16:17] offset:400 glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_imm_offset:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v0, s[0:1] offset:400 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !4, !noundef !{}
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds float, ptr addrspace(1) %gep0, i64 100
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep1, metadata !3)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;; Range is 1 beyond the limit where we can move the shift into 32-bits.
+define <4 x float> @global_load_f32_saddr_zext_vgpr_range_too_large(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s17
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s17
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v2, s17
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v2, s17
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, s16, v0
+; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, s1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, s1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, s1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s16, v0
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s17, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s16, v0
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s17, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s16, v0
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s17, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-SDAG-NEXT: v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_f32_saddr_zext_vgpr_range_too_large:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr, !range !5, !noundef !{}
+ %zext.offset = zext i32 %voffset to i64
+ %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %gep, metadata !0)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; or-with-constant as add
+;;------------------------------------------------------------------------------
+
+;; Check add-as-or with split 64-bit or.
+define <4 x float> @global_load_saddr_i8_offset_or_i64_imm_offset_16(ptr addrspace(6) inreg %sbase, i32 %idx) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX1012-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_bitop2_b32 v0, 16, v0 bitop3:0x54
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1012-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_bitop2_b32 v0, 16, v0 bitop3:0x54
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_16:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 16, v0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.idx = zext i32 %idx to i64
+ %or = or i64 %zext.idx, 16
+ %addr = inttoptr i64 %or to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !1)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+define <4 x float> @global_load_saddr_i8_offset_or_i64_imm_offset_4160(ptr addrspace(6) inreg %sbase, i32 %idx) {
+; GFX9-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX1012-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX90A-ISEL: ; %bb.0:
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1012-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_load_saddr_i8_offset_or_i64_imm_offset_4160:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-ISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.idx = zext i32 %idx to i64
+ %or = or i64 %zext.idx, 4160
+ %addr = inttoptr i64 %or to ptr addrspace(1)
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !2)
+ %cast.load = bitcast <4 x i32> %load to <4 x float>
+ ret <4 x float> %cast.load
+}
+
+;;------------------------------------------------------------------------------
+;; Full 64-bit scalar add.
+;;------------------------------------------------------------------------------
+define <4 x float> @global_saddr_64bit_lsr_iv(ptr addrspace(1) inreg %arg) {
+; GFX9-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX9-GENERIC-SDAG-NEXT: .LBB114_1: ; %bb3
+; GFX9-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX9-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX9-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
+; GFX9-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX9-GENERIC-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX9-GENERIC-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX9-GENERIC-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX9-GENERIC-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX906-SDAG: ; %bb.0: ; %bb
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX906-SDAG-NEXT: .LBB114_1: ; %bb3
+; GFX906-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX906-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX906-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX906-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
+; GFX906-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX906-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX906-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX906-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX908-SDAG: ; %bb.0: ; %bb
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX908-SDAG-NEXT: .LBB114_1: ; %bb3
+; GFX908-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX908-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX908-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX908-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
+; GFX908-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX908-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX908-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX908-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX908-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX90A-SDAG: ; %bb.0: ; %bb
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX90A-SDAG-NEXT: .LBB114_1: ; %bb3
+; GFX90A-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX90A-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX90A-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX90A-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
+; GFX90A-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX90A-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX90A-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX90A-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX90A-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX9-4-GENERIC-SDAG-NEXT: .LBB114_1: ; %bb3
+; GFX9-4-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-SDAG-NEXT: s_add_i32 s2, s2, 1
+; GFX9-4-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff
+; GFX9-4-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
+; GFX9-4-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-SDAG-NEXT: s_mov_b32 s3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
+; GFX9-4-GENERIC-SDAG-NEXT: s_add_u32 s0, s0, s2
+; GFX9-4-GENERIC-SDAG-NEXT: s_addc_u32 s1, s1, s3
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX942-SDAG: ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX942-SDAG-NEXT: .LBB114_1: ; %bb3
+; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX942-SDAG-NEXT: s_add_i32 s2, s2, 1
+; GFX942-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff
+; GFX942-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
+; GFX942-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX942-SDAG-NEXT: s_mov_b32 s3, 0
+; GFX942-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
+; GFX942-SDAG-NEXT: s_add_u32 s0, s0, s2
+; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, s3
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX950-SDAG: ; %bb.0: ; %bb
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX950-SDAG-NEXT: .LBB114_1: ; %bb3
+; GFX950-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX950-SDAG-NEXT: s_add_i32 s2, s2, 1
+; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff
+; GFX950-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
+; GFX950-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX950-SDAG-NEXT: s_mov_b32 s3, 0
+; GFX950-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
+; GFX950-SDAG-NEXT: s_add_u32 s0, s0, s2
+; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, s3
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX10-1-GENERIC-SDAG-NEXT: .LBB114_1: ; %bb3
+; GFX10-1-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX10-1-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX10-1-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
+; GFX10-1-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-1-GENERIC-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX10-1-GENERIC-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc dlc
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX1012-SDAG: ; %bb.0: ; %bb
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX1012-SDAG-NEXT: .LBB114_1: ; %bb3
+; GFX1012-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1012-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX1012-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX1012-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
+; GFX1012-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX1012-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX1012-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX1012-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc dlc
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX10-3-GENERIC-SDAG-NEXT: .LBB114_1: ; %bb3
+; GFX10-3-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX10-3-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX10-3-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
+; GFX10-3-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-3-GENERIC-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX10-3-GENERIC-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] glc dlc
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX11-GENERIC-SDAG-NEXT: .LBB114_1: ; %bb3
+; GFX11-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT: s_add_i32 s2, s2, 1
+; GFX11-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff
+; GFX11-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
+; GFX11-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX11-GENERIC-SDAG-NEXT: s_mov_b32 s3, 0
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT: s_add_u32 s0, s0, s2
+; GFX11-GENERIC-SDAG-NEXT: s_addc_u32 s1, s1, s3
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] glc
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX1250-SDAG: ; %bb.0: ; %bb
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX1250-SDAG-NEXT: .LBB114_1: ; %bb3
+; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, 1
+; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, 0xff
+; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
+; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX1250-SDAG-NEXT: s_mov_b32 s3, 0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX12-GENERIC-SDAG-NEXT: .LBB114_1: ; %bb3
+; GFX12-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_add_co_i32 s2, s2, 1
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_cmp_eq_u32 s2, 0xff
+; GFX12-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB114_1
+; GFX12-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX12-GENERIC-SDAG-NEXT: s_mov_b32 s3, 0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1] scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT: .LBB114_1: ; %bb3
+; GFX9-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX9-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX9-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB114_1
+; GFX9-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX906-ISEL: ; %bb.0: ; %bb
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT: .LBB114_1: ; %bb3
+; GFX906-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX906-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX906-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX906-ISEL-NEXT: s_cbranch_vccz .LBB114_1
+; GFX906-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX908-ISEL: ; %bb.0: ; %bb
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT: .LBB114_1: ; %bb3
+; GFX908-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX908-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX908-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX908-ISEL-NEXT: s_cbranch_vccz .LBB114_1
+; GFX908-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX90A-ISEL: ; %bb.0: ; %bb
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX90A-ISEL-NEXT: .LBB114_1: ; %bb3
+; GFX90A-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX90A-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX90A-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX90A-ISEL-NEXT: s_cbranch_vccz .LBB114_1
+; GFX90A-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_mov_b32 s2, -1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-ISEL-NEXT: .LBB114_1: ; %bb3
+; GFX9-4-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX9-4-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX9-4-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB114_1
+; GFX9-4-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX942-ISEL: ; %bb.0: ; %bb
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_mov_b32 s2, -1
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX942-ISEL-NEXT: .LBB114_1: ; %bb3
+; GFX942-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX942-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX942-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX942-ISEL-NEXT: s_cbranch_vccz .LBB114_1
+; GFX942-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX950-ISEL: ; %bb.0: ; %bb
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_mov_b32 s2, -1
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-ISEL-NEXT: .LBB114_1: ; %bb3
+; GFX950-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX950-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX950-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX950-ISEL-NEXT: s_cbranch_vccz .LBB114_1
+; GFX950-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: .LBB114_1: ; %bb3
+; GFX10-1-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB114_1
+; GFX10-1-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX10-1-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX1012-ISEL: ; %bb.0: ; %bb
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: .LBB114_1: ; %bb3
+; GFX1012-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1012-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
+; GFX1012-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX1012-ISEL-NEXT: s_cbranch_vccz .LBB114_1
+; GFX1012-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: .LBB114_1: ; %bb3
+; GFX10-3-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB114_1
+; GFX10-3-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX10-3-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc dlc
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_mov_b32 s2, -1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-ISEL-NEXT: .LBB114_1: ; %bb3
+; GFX11-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
+; GFX11-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX11-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB114_1
+; GFX11-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off glc
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX1250-ISEL: ; %bb.0: ; %bb
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_mov_b32 s2, -1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-ISEL-NEXT: .LBB114_1: ; %bb3
+; GFX1250-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
+; GFX1250-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX1250-ISEL-NEXT: s_cbranch_vccz .LBB114_1
+; GFX1250-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_mov_b32 s2, -1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-ISEL-NEXT: .LBB114_1: ; %bb3
+; GFX12-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
+; GFX12-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX12-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB114_1
+; GFX12-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+bb:
+ br label %bb3
+
+bb2: ; preds = %bb3
+ ret <4 x float> %i6
+
+bb3: ; preds = %bb3, %bb
+ %i = phi i32 [ 0, %bb ], [ %i8, %bb3 ]
+ %i4 = zext i32 %i to i64
+ %i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %i5, metadata !3)
+ %i6 = bitcast <4 x i32> %load to <4 x float>
+ %i8 = add nuw nsw i32 %i, 1
+ %i9 = icmp eq i32 %i8, 256
+ br i1 %i9, label %bb2, label %bb3
+}
+
+;; Make sure we only have a single zero vaddr initialization.
+
+define <4 x float> @global_saddr_64bit_lsr_iv_multiload(ptr addrspace(1) inreg %arg, ptr addrspace(1) inreg %arg.1, i32 %x) {
+; GFX9-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX9-GENERIC-SDAG-NEXT: .LBB115_1: ; %bb5
+; GFX9-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX9-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX9-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
+; GFX9-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX9-GENERIC-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX9-GENERIC-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX9-GENERIC-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX9-GENERIC-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX906-SDAG: ; %bb.0: ; %bb
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX906-SDAG-NEXT: .LBB115_1: ; %bb5
+; GFX906-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX906-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX906-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX906-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
+; GFX906-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX906-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX906-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX906-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX906-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX908-SDAG: ; %bb.0: ; %bb
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX908-SDAG-NEXT: .LBB115_1: ; %bb5
+; GFX908-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX908-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX908-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX908-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
+; GFX908-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX908-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX908-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX908-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX908-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX90A-SDAG: ; %bb.0: ; %bb
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX90A-SDAG-NEXT: .LBB115_1: ; %bb5
+; GFX90A-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX90A-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX90A-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX90A-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
+; GFX90A-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX90A-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX90A-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX90A-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX90A-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX9-4-GENERIC-SDAG-NEXT: .LBB115_1: ; %bb5
+; GFX9-4-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-SDAG-NEXT: s_add_i32 s2, s2, 1
+; GFX9-4-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff
+; GFX9-4-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
+; GFX9-4-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-SDAG-NEXT: s_mov_b32 s3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
+; GFX9-4-GENERIC-SDAG-NEXT: s_add_u32 s0, s0, s2
+; GFX9-4-GENERIC-SDAG-NEXT: s_addc_u32 s1, s1, s3
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX942-SDAG: ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX942-SDAG-NEXT: .LBB115_1: ; %bb5
+; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX942-SDAG-NEXT: s_add_i32 s2, s2, 1
+; GFX942-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff
+; GFX942-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
+; GFX942-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX942-SDAG-NEXT: s_mov_b32 s3, 0
+; GFX942-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
+; GFX942-SDAG-NEXT: s_add_u32 s0, s0, s2
+; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, s3
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX950-SDAG: ; %bb.0: ; %bb
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX950-SDAG-NEXT: .LBB115_1: ; %bb5
+; GFX950-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX950-SDAG-NEXT: s_add_i32 s2, s2, 1
+; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff
+; GFX950-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
+; GFX950-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX950-SDAG-NEXT: s_mov_b32 s3, 0
+; GFX950-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
+; GFX950-SDAG-NEXT: s_add_u32 s0, s0, s2
+; GFX950-SDAG-NEXT: s_addc_u32 s1, s1, s3
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX10-1-GENERIC-SDAG-NEXT: .LBB115_1: ; %bb5
+; GFX10-1-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX10-1-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX10-1-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
+; GFX10-1-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-1-GENERIC-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX10-1-GENERIC-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX1012-SDAG: ; %bb.0: ; %bb
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX1012-SDAG-NEXT: .LBB115_1: ; %bb5
+; GFX1012-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1012-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX1012-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX1012-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
+; GFX1012-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX1012-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX1012-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX1012-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX1012-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_mov_b32 s4, -1
+; GFX10-3-GENERIC-SDAG-NEXT: .LBB115_1: ; %bb5
+; GFX10-3-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-SDAG-NEXT: s_add_i32 s4, s4, 1
+; GFX10-3-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s4, 0xff
+; GFX10-3-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
+; GFX10-3-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
+; GFX10-3-GENERIC-SDAG-NEXT: s_add_u32 s4, s16, s4
+; GFX10-3-GENERIC-SDAG-NEXT: s_addc_u32 s5, s17, s5
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX11-GENERIC-SDAG-NEXT: .LBB115_1: ; %bb5
+; GFX11-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT: s_add_i32 s2, s2, 1
+; GFX11-GENERIC-SDAG-NEXT: s_cmpk_eq_i32 s2, 0xff
+; GFX11-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
+; GFX11-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX11-GENERIC-SDAG-NEXT: s_mov_b32 s3, 0
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-SDAG-NEXT: s_add_u32 s0, s0, s2
+; GFX11-GENERIC-SDAG-NEXT: s_addc_u32 s1, s1, s3
+; GFX11-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX1250-SDAG: ; %bb.0: ; %bb
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX1250-SDAG-NEXT: .LBB115_1: ; %bb5
+; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, 1
+; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, 0xff
+; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
+; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX1250-SDAG-NEXT: s_mov_b32 s3, 0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX1250-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %bb
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX12-GENERIC-SDAG-NEXT: .LBB115_1: ; %bb5
+; GFX12-GENERIC-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_add_co_i32 s2, s2, 1
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_cmp_eq_u32 s2, 0xff
+; GFX12-GENERIC-SDAG-NEXT: s_cbranch_scc0 .LBB115_1
+; GFX12-GENERIC-SDAG-NEXT: ; %bb.2: ; %bb2
+; GFX12-GENERIC-SDAG-NEXT: s_mov_b32 s3, 0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX12-GENERIC-SDAG-NEXT: global_load_b128 v[0:3], v0, s[0:1]
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-GENERIC-ISEL-NEXT: .LBB115_1: ; %bb5
+; GFX9-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-GENERIC-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX9-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX9-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB115_1
+; GFX9-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX906-ISEL: ; %bb.0: ; %bb
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX906-ISEL-NEXT: .LBB115_1: ; %bb5
+; GFX906-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX906-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX906-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX906-ISEL-NEXT: s_cbranch_vccz .LBB115_1
+; GFX906-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX906-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX906-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX908-ISEL: ; %bb.0: ; %bb
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX908-ISEL-NEXT: .LBB115_1: ; %bb5
+; GFX908-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX908-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX908-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX908-ISEL-NEXT: s_cbranch_vccz .LBB115_1
+; GFX908-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX908-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX90A-ISEL: ; %bb.0: ; %bb
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX90A-ISEL-NEXT: .LBB115_1: ; %bb5
+; GFX90A-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX90A-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX90A-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX90A-ISEL-NEXT: s_cbranch_vccz .LBB115_1
+; GFX90A-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX90A-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX90A-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[16:17], s[16:17] op_sel:[0,1]
+; GFX90A-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX90A-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
+; GFX90A-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX90A-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_mov_b32 s2, -1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-4-GENERIC-ISEL-NEXT: .LBB115_1: ; %bb5
+; GFX9-4-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX9-4-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX9-4-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB115_1
+; GFX9-4-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-4-GENERIC-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX942-ISEL: ; %bb.0: ; %bb
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: s_mov_b32 s2, -1
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX942-ISEL-NEXT: .LBB115_1: ; %bb5
+; GFX942-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX942-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX942-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX942-ISEL-NEXT: s_cbranch_vccz .LBB115_1
+; GFX942-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX942-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX950-ISEL: ; %bb.0: ; %bb
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: s_mov_b32 s2, -1
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-ISEL-NEXT: .LBB115_1: ; %bb5
+; GFX950-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX950-ISEL-NEXT: v_add_u32_e32 v0, 1, v0
+; GFX950-ISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX950-ISEL-NEXT: s_cbranch_vccz .LBB115_1
+; GFX950-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX950-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX950-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-1-GENERIC-ISEL-NEXT: .LBB115_1: ; %bb5
+; GFX10-1-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX10-1-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB115_1
+; GFX10-1-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX10-1-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX1012-ISEL: ; %bb.0: ; %bb
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1012-ISEL-NEXT: .LBB115_1: ; %bb5
+; GFX1012-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1012-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
+; GFX1012-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX1012-ISEL-NEXT: s_cbranch_vccz .LBB115_1
+; GFX1012-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX1012-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_mov_b32 s4, -1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-3-GENERIC-ISEL-NEXT: .LBB115_1: ; %bb5
+; GFX10-3-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX10-3-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB115_1
+; GFX10-3-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s16
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s17
+; GFX10-3-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_mov_b32 s2, -1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GENERIC-ISEL-NEXT: .LBB115_1: ; %bb5
+; GFX11-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
+; GFX11-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX11-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB115_1
+; GFX11-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX1250-ISEL: ; %bb.0: ; %bb
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: s_mov_b32 s2, -1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-ISEL-NEXT: .LBB115_1: ; %bb5
+; GFX1250-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
+; GFX1250-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX1250-ISEL-NEXT: s_cbranch_vccz .LBB115_1
+; GFX1250-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
+; GFX1250-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_saddr_64bit_lsr_iv_multiload:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %bb
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_mov_b32 s2, -1
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX12-GENERIC-ISEL-NEXT: .LBB115_1: ; %bb5
+; GFX12-GENERIC-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
+; GFX12-GENERIC-ISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0xff, v0
+; GFX12-GENERIC-ISEL-NEXT: s_cbranch_vccz .LBB115_1
+; GFX12-GENERIC-ISEL-NEXT: ; %bb.2: ; %bb2
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_load_b128 v[0:3], v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+bb:
+ br label %bb5
+
+bb2:
+ %y = icmp eq i32 %x, 0
+ br i1 %y, label %bb3, label %bb4
+
+bb3:
+ ret <4 x float> %i6
+
+bb4:
+ ret <4 x float> %i6.1
+
+bb5:
+ %i = phi i32 [ 0, %bb ], [ %i8, %bb5 ]
+ %i4 = zext i32 %i to i64
+ %i5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %i4
+ %load = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %i5, metadata !0)
+ %i6 = bitcast <4 x i32> %load to <4 x float>
+ %i5.1 = getelementptr inbounds float, ptr addrspace(1) %arg.1, i64 %i4
+ %load.1 = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %i5, metadata !1)
+ %i6.1 = bitcast <4 x i32> %load to <4 x float>
+ %i8 = add nuw nsw i32 %i, 1
+ %i9 = icmp eq i32 %i8, 256
+ br i1 %i9, label %bb2, label %bb5
+}
+;;==============================================================================
+;; } End saddr addressing modes
+;;==============================================================================
+
+!0 = !{!"wavefront"}
+!1 = !{!"workgroup"}
+!2 = !{!"agent"}
+!3 = !{!""}
+
+!4 = !{i32 0, i32 1073741824} ; (1 << 30)
+!5 = !{i32 0, i32 1073741825} ; (1 << 30) + 1
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX: {{.*}}
+; GFX-ISEL: {{.*}}
+; GFX-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.b128.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.b128.ll
new file mode 100644
index 0000000000000..b71be3f4ca034
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.b128.ll
@@ -0,0 +1,3888 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-generic < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX9-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX906-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX908-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX90a-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-4-generic < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX9-4-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX942-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX950-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX10-1-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX1012-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX10-3-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx11-generic < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX11-GENERIC-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX1250-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx12-generic < %s | FileCheck -check-prefixes=GFX,GFX-SDAG,GFX12-GENERIC-SDAG %s
+
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-generic < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX9-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX906-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX908-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX90a-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-4-generic < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX9-4-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX942-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX950-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX10-1-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1012 < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX1012-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX10-3-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx11-generic < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX11-GENERIC-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX1250-ISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx12-generic < %s | FileCheck -check-prefixes=GFX,GFX-ISEL,GFX12-GENERIC-ISEL %s
+
+;;==============================================================================
+;; A few basic test cases
+;;==============================================================================
+define void @global_store_b128_0_00(ptr addrspace(1) %addr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_b128_0_00:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_b128_0_00:
+; GFX906-SDAG: ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_b128_0_00:
+; GFX908-SDAG: ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_b128_0_00:
+; GFX90a-SDAG: ; %bb.0: ; %entry
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_b128_0_00:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_b128_0_00:
+; GFX942-SDAG: ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_b128_0_00:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_b128_0_00:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_b128_0_00:
+; GFX1012-SDAG: ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_b128_0_00:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_b128_0_00:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_b128_0_00:
+; GFX1250-SDAG: ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_b128_0_00:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_b128_0_00:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_b128_0_00:
+; GFX906-ISEL: ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_b128_0_00:
+; GFX908-ISEL: ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_b128_0_00:
+; GFX90a-ISEL: ; %bb.0: ; %entry
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_b128_0_00:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_b128_0_00:
+; GFX942-ISEL: ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_b128_0_00:
+; GFX950-ISEL: ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_b128_0_00:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_b128_0_00:
+; GFX1012-ISEL: ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_b128_0_00:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_b128_0_00:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_b128_0_00:
+; GFX1250-ISEL: ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_b128_0_00:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !0)
+ ret void
+}
+
+define void @global_store_b128_0_01(ptr addrspace(1) %addr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_b128_0_01:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_b128_0_01:
+; GFX906-SDAG: ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_b128_0_01:
+; GFX908-SDAG: ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_b128_0_01:
+; GFX90a-SDAG: ; %bb.0: ; %entry
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_b128_0_01:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_b128_0_01:
+; GFX942-SDAG: ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_b128_0_01:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_b128_0_01:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_b128_0_01:
+; GFX1012-SDAG: ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_b128_0_01:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_b128_0_01:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_b128_0_01:
+; GFX1250-SDAG: ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_b128_0_01:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_b128_0_01:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_b128_0_01:
+; GFX906-ISEL: ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_b128_0_01:
+; GFX908-ISEL: ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_b128_0_01:
+; GFX90a-ISEL: ; %bb.0: ; %entry
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_b128_0_01:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_b128_0_01:
+; GFX942-ISEL: ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_b128_0_01:
+; GFX950-ISEL: ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_b128_0_01:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_b128_0_01:
+; GFX1012-ISEL: ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_b128_0_01:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_b128_0_01:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_b128_0_01:
+; GFX1250-ISEL: ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_b128_0_01:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[2:5], off scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !1)
+ ret void
+}
+
+define void @global_store_b128_0_10(ptr addrspace(1) %addr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_b128_0_10:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_b128_0_10:
+; GFX906-SDAG: ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_b128_0_10:
+; GFX908-SDAG: ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_b128_0_10:
+; GFX90a-SDAG: ; %bb.0: ; %entry
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_b128_0_10:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_b128_0_10:
+; GFX942-SDAG: ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_b128_0_10:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_b128_0_10:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_b128_0_10:
+; GFX1012-SDAG: ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_b128_0_10:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_b128_0_10:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_b128_0_10:
+; GFX1250-SDAG: ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_b128_0_10:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_b128_0_10:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_b128_0_10:
+; GFX906-ISEL: ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_b128_0_10:
+; GFX908-ISEL: ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_b128_0_10:
+; GFX90a-ISEL: ; %bb.0: ; %entry
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_b128_0_10:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_b128_0_10:
+; GFX942-ISEL: ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_b128_0_10:
+; GFX950-ISEL: ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_b128_0_10:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_b128_0_10:
+; GFX1012-ISEL: ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_b128_0_10:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_b128_0_10:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_b128_0_10:
+; GFX1250-ISEL: ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_store_b128 v[0:1], v[2:5], off scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_b128_0_10:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[2:5], off scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !2)
+ ret void
+}
+
+define void @global_store_b128_0_11(ptr addrspace(1) %addr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_b128_0_11:
+; GFX9-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_b128_0_11:
+; GFX906-SDAG: ; %bb.0: ; %entry
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_b128_0_11:
+; GFX908-SDAG: ; %bb.0: ; %entry
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_b128_0_11:
+; GFX90a-SDAG: ; %bb.0: ; %entry
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_b128_0_11:
+; GFX9-4-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_b128_0_11:
+; GFX942-SDAG: ; %bb.0: ; %entry
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_b128_0_11:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_b128_0_11:
+; GFX10-1-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_b128_0_11:
+; GFX1012-SDAG: ; %bb.0: ; %entry
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_b128_0_11:
+; GFX10-3-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_b128_0_11:
+; GFX11-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_b128_0_11:
+; GFX1250-SDAG: ; %bb.0: ; %entry
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_b128_0_11:
+; GFX12-GENERIC-SDAG: ; %bb.0: ; %entry
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_b128_0_11:
+; GFX9-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_b128_0_11:
+; GFX906-ISEL: ; %bb.0: ; %entry
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_b128_0_11:
+; GFX908-ISEL: ; %bb.0: ; %entry
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_b128_0_11:
+; GFX90a-ISEL: ; %bb.0: ; %entry
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_b128_0_11:
+; GFX9-4-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_b128_0_11:
+; GFX942-ISEL: ; %bb.0: ; %entry
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_b128_0_11:
+; GFX950-ISEL: ; %bb.0: ; %entry
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_b128_0_11:
+; GFX10-1-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_b128_0_11:
+; GFX1012-ISEL: ; %bb.0: ; %entry
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_b128_0_11:
+; GFX10-3-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_b128_0_11:
+; GFX11-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_b128_0_11:
+; GFX1250-ISEL: ; %bb.0: ; %entry
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_store_b128 v[0:1], v[2:5], off scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_b128_0_11:
+; GFX12-GENERIC-ISEL: ; %bb.0: ; %entry
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[2:5], off scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !3)
+ ret void
+}
+
+;;==============================================================================
+;; Signed offset addressing modes (derived from global-saddr-store.ll) {
+;;==============================================================================
+
+define void @global_store_i8_zext_vgpr(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX90a-SDAG: ; %bb.0:
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[4:7], off
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_store_b128 v[0:1], v[4:7], off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[4:7], off
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX90a-ISEL: ; %bb.0:
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[4:7], off
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: global_store_b128 v[0:1], v[4:7], off
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[4:7], off
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep0, <4 x i32> %data, metadata !0)
+ ret void
+}
+
+define void @global_store_v4i32_zext_vgpr_offset_neg128(ptr addrspace(1) %sbase, i32 %voffset, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX90a-SDAG: ; %bb.0:
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-SDAG-NEXT: v_mov_b32_e32 v7, v6
+; GFX90a-SDAG-NEXT: v_mov_b32_e32 v6, v5
+; GFX90a-SDAG-NEXT: v_mov_b32_e32 v5, v4
+; GFX90a-SDAG-NEXT: v_mov_b32_e32 v4, v3
+; GFX90a-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-128
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v7, v6
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v5, v4
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v4, v3
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-128 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v6
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-128 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, v6
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, v5
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, v4
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, v3
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-128 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[3:6], off offset:-128
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_store_b128 v[0:1], v[4:7], off offset:-128
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[3:6], off offset:-128 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX90a-ISEL: ; %bb.0:
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-ISEL-NEXT: v_mov_b32_e32 v8, v3
+; GFX90a-ISEL-NEXT: v_mov_b32_e32 v9, v4
+; GFX90a-ISEL-NEXT: v_mov_b32_e32 v10, v5
+; GFX90a-ISEL-NEXT: v_mov_b32_e32 v11, v6
+; GFX90a-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:-128
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v8, v3
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v9, v4
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v10, v5
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v11, v6
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:-128 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v8, v3
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v9, v4
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v10, v5
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v11, v6
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:-128 sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v8, v3
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v9, v4
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v10, v5
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v11, v6
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:-128 sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:-128
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[3:6], off offset:-128
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
+; GFX1250-ISEL-NEXT: v_dual_mov_b32 v10, v5 :: v_dual_mov_b32 v11, v6
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: global_store_b128 v[0:1], v[8:11], off offset:-128
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_v4i32_zext_vgpr_offset_neg128:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[3:6], off offset:-128 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !1)
+ ret void
+}
+
+;; Maximum positive offset on gfx10
+define void @global_store_i8_zext_vgpr_offset_2047(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX90a-SDAG: ; %bb.0:
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[4:7], off offset:2047
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_store_b128 v[0:1], v[4:7], off offset:2047 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[4:7], off offset:2047 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX90a-ISEL: ; %bb.0:
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2047
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[4:7], off offset:2047
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: global_store_b128 v[0:1], v[4:7], off offset:2047 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[4:7], off offset:2047 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !2)
+ ret void
+}
+
+;; Maximum negative offset on gfx10
+define void @global_store_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) %sbase, ptr addrspace(1) %voffset.ptr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX90a-SDAG: ; %bb.0:
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[4:7], off offset:-2048
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: global_store_b128 v[0:1], v[4:7], off offset:-2048 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-SDAG-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v[0:1], v[4:7], off offset:-2048 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX906-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX908-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX90a-ISEL: ; %bb.0:
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX90a-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX942-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048 sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX950-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048 sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v2, v[2:3], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:-2048
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[4:7], off offset:-2048
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: global_store_b128 v[0:1], v[4:7], off offset:-2048 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v[0:1], v[4:7], off offset:-2048 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !3)
+ ret void
+}
+;;==============================================================================
+;; } end signed offset addressing modes
+;;==============================================================================
+
+;;==============================================================================
+;; Various saddr addressing modes (derived from global-saddr-load.ll) {
+;;==============================================================================
+
+define void @global_store_saddr_i8_zext_vgpr(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX90a-SDAG: ; %bb.0:
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1]
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1]
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1]
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX90a-ISEL: ; %bb.0:
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1]
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1]
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1]
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep0, <4 x i32> %data, metadata !0)
+ ret void
+}
+
+define void @global_store_saddr_v4i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX90a-SDAG: ; %bb.0:
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT: v_mov_b32_e32 v5, v4
+; GFX90a-SDAG-NEXT: v_mov_b32_e32 v4, v3
+; GFX90a-SDAG-NEXT: v_mov_b32_e32 v3, v2
+; GFX90a-SDAG-NEXT: v_mov_b32_e32 v2, v1
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-128
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v5, v4
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v4, v3
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, v2
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:-128 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:-128 sc0
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, v4
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, v3
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
+; GFX950-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:-128 sc0
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:-128
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:-128
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:-128 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX90a-ISEL: ; %bb.0:
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT: v_mov_b32_e32 v6, v1
+; GFX90a-ISEL-NEXT: v_mov_b32_e32 v7, v2
+; GFX90a-ISEL-NEXT: v_mov_b32_e32 v8, v3
+; GFX90a-ISEL-NEXT: v_mov_b32_e32 v9, v4
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17] offset:-128
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v6, v1
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v7, v2
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v8, v3
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v9, v4
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:-128 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v6, v1
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v7, v2
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v8, v3
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v9, v4
+; GFX942-ISEL-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:-128 sc0
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v6, v1
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v7, v2
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v8, v3
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v9, v4
+; GFX950-ISEL-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:-128 sc0
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17] offset:-128
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:-128
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
+; GFX1250-ISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
+; GFX1250-ISEL-NEXT: global_store_b128 v0, v[6:9], s[0:1] offset:-128
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:-128 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !1)
+ ret void
+}
+
+;; Maximum positive offset on gfx10
+define void @global_store_saddr_i8_zext_vgpr_offset_2047(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX90a-SDAG: ; %bb.0:
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:2047 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:2047 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:2047 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:2047
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:2047 scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:2047 scope:SCOPE_DEV
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX90a-ISEL: ; %bb.0:
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:2047 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:2047 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:2047 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:2047
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:2047
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:2047 scope:SCOPE_DEV
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:2047 scope:SCOPE_DEV
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !2)
+ ret void
+}
+
+;; Maximum negative offset on gfx10
+define void @global_store_saddr_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX906-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX906-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX908-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX908-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX90a-SDAG: ; %bb.0:
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX90a-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:-2048 sc0 sc1
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:-2048 sc0 sc1
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:-2048 sc0 sc1
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX10-1-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX1012-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX10-3-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:-2048
+; GFX11-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:-2048 scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:-2048 scope:SCOPE_SYS
+; GFX12-GENERIC-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX906-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX906-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX908-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX908-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90a-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX90a-ISEL: ; %bb.0:
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX90a-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:-2048 sc0 sc1
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:-2048 sc0 sc1
+; GFX942-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX942-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:-2048 sc0 sc1
+; GFX950-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX950-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX10-1-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1012-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1012-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX1012-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX1012-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:-2048
+; GFX10-3-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:-2048
+; GFX11-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX1250-ISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-ISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:-2048 scope:SCOPE_SYS
+; GFX1250-ISEL-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_expcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_samplecnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_load_b32 v0, v[0:1], off
+; GFX12-GENERIC-ISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:-2048 scope:SCOPE_SYS
+; GFX12-GENERIC-ISEL-NEXT: s_setpc_b64 s[30:31]
+ %voffset = load i32, ptr addrspace(1) %voffset.ptr
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !3)
+ ret void
+}
+
+;;------------------------------------------------------------------------------
+;; Uniformity edge cases
+;;------------------------------------------------------------------------------
+
+ at ptr.in.lds = internal addrspace(3) global ptr addrspace(1) poison
+
+;; Base pointer is uniform, but also in VGPRs
+define amdgpu_kernel void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX9-GENERIC-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: ds_read_b64 v[4:5], v0
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; GFX9-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX9-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX9-GENERIC-SDAG-NEXT: s_nop 4
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX9-GENERIC-SDAG-NEXT: s_endpgm
+;
+; GFX906-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX906-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: ds_read_b64 v[4:5], v0
+; GFX906-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; GFX906-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX906-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX906-SDAG-NEXT: s_nop 4
+; GFX906-SDAG-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX906-SDAG-NEXT: s_endpgm
+;
+; GFX908-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX908-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: ds_read_b64 v[4:5], v0
+; GFX908-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; GFX908-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX908-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX908-SDAG-NEXT: s_nop 4
+; GFX908-SDAG-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX908-SDAG-NEXT: s_endpgm
+;
+; GFX90a-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX90a-SDAG: ; %bb.0:
+; GFX90a-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX90a-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX90a-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-SDAG-NEXT: ds_read_b64 v[0:1], v0
+; GFX90a-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-SDAG-NEXT: v_mov_b32_e32 v2, s6
+; GFX90a-SDAG-NEXT: v_pk_mov_b32 v[6:7], s[2:3], s[2:3] op_sel:[0,1]
+; GFX90a-SDAG-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; GFX90a-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; GFX90a-SDAG-NEXT: s_nop 4
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v2, v[4:7], s[0:1]
+; GFX90a-SDAG-NEXT: s_endpgm
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX9-4-GENERIC-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: ds_read_b64 v[0:1], v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s6
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; GFX9-4-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 4
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v2, v[4:7], s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: s_endpgm
+;
+; GFX942-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: ds_read_b64 v[0:1], v0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, s6
+; GFX942-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3]
+; GFX942-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
+; GFX942-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; GFX942-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; GFX942-SDAG-NEXT: s_nop 4
+; GFX942-SDAG-NEXT: global_store_dwordx4 v2, v[4:7], s[0:1]
+; GFX942-SDAG-NEXT: s_endpgm
+;
+; GFX950-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: ds_read_b64 v[0:1], v0
+; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, s6
+; GFX950-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3]
+; GFX950-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; GFX950-SDAG-NEXT: s_nop 4
+; GFX950-SDAG-NEXT: global_store_dwordx4 v2, v[4:7], s[0:1]
+; GFX950-SDAG-NEXT: s_endpgm
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT: s_clause 0x1
+; GFX10-1-GENERIC-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX10-1-GENERIC-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-1-GENERIC-SDAG-NEXT: ds_read_b64 v[4:5], v0
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; GFX10-1-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX10-1-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX10-1-GENERIC-SDAG-NEXT: s_endpgm
+;
+; GFX1012-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT: s_clause 0x1
+; GFX1012-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX1012-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX1012-SDAG-NEXT: ds_read_b64 v[4:5], v0
+; GFX1012-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; GFX1012-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX1012-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX1012-SDAG-NEXT: s_endpgm
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT: s_clause 0x1
+; GFX10-3-GENERIC-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX10-3-GENERIC-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-3-GENERIC-SDAG-NEXT: ds_read_b64 v[4:5], v0
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; GFX10-3-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX10-3-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX10-3-GENERIC-SDAG-NEXT: s_endpgm
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_clause 0x1
+; GFX11-GENERIC-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x24
+; GFX11-GENERIC-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: ds_load_b64 v[4:5], v0
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX11-GENERIC-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-SDAG-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX11-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX11-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v6, v[0:3], s[0:1]
+; GFX11-GENERIC-SDAG-NEXT: s_endpgm
+;
+; GFX1250-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX1250-SDAG-NEXT: s_clause 0x1
+; GFX1250-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x24
+; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v6, s6
+; GFX1250-SDAG-NEXT: ds_load_b64 v[4:5], v0
+; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
+; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX1250-SDAG-NEXT: global_store_b128 v6, v[0:3], s[0:1]
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_clause 0x1
+; GFX12-GENERIC-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x24
+; GFX12-GENERIC-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: ds_load_b64 v[4:5], v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX12-GENERIC-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-SDAG-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX12-GENERIC-SDAG-NEXT: s_wait_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX12-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v6, v[0:3], s[0:1]
+; GFX12-GENERIC-SDAG-NEXT: s_endpgm
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX9-GENERIC-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX9-GENERIC-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
+; GFX9-GENERIC-ISEL-NEXT: s_endpgm
+;
+; GFX906-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX906-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX906-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX906-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX906-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
+; GFX906-ISEL-NEXT: s_endpgm
+;
+; GFX908-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX908-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX908-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX908-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX908-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
+; GFX908-ISEL-NEXT: s_endpgm
+;
+; GFX90a-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX90a-ISEL: ; %bb.0:
+; GFX90a-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX90a-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX90a-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX90a-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX90a-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
+; GFX90a-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX90a-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX90a-ISEL-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
+; GFX90a-ISEL-NEXT: s_endpgm
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX9-4-GENERIC-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
+; GFX9-4-GENERIC-ISEL-NEXT: s_endpgm
+;
+; GFX942-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX942-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX942-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX942-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
+; GFX942-ISEL-NEXT: s_endpgm
+;
+; GFX950-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX950-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX950-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX950-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX950-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
+; GFX950-ISEL-NEXT: s_endpgm
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT: s_clause 0x1
+; GFX10-1-GENERIC-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX10-1-GENERIC-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-1-GENERIC-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX10-1-GENERIC-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
+; GFX10-1-GENERIC-ISEL-NEXT: s_endpgm
+;
+; GFX1012-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT: s_clause 0x1
+; GFX1012-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX1012-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX1012-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX1012-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX1012-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX1012-ISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
+; GFX1012-ISEL-NEXT: s_endpgm
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT: s_clause 0x1
+; GFX10-3-GENERIC-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX10-3-GENERIC-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-3-GENERIC-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX10-3-GENERIC-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
+; GFX10-3-GENERIC-ISEL-NEXT: s_endpgm
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_clause 0x1
+; GFX11-GENERIC-ISEL-NEXT: s_load_b32 s6, s[4:5], 0x24
+; GFX11-GENERIC-ISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX11-GENERIC-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GFX11-GENERIC-ISEL-NEXT: ds_load_b64 v[0:1], v0
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX11-GENERIC-ISEL-NEXT: s_endpgm
+;
+; GFX1250-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX1250-ISEL-NEXT: s_clause 0x1
+; GFX1250-ISEL-NEXT: s_load_b32 s6, s[4:5], 0x24
+; GFX1250-ISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX1250-ISEL-NEXT: ds_load_b64 v[0:1], v0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
+; GFX1250-ISEL-NEXT: s_wait_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX1250-ISEL-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX1250-ISEL-NEXT: s_endpgm
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_clause 0x1
+; GFX12-GENERIC-ISEL-NEXT: s_load_b32 s6, s[4:5], 0x24
+; GFX12-GENERIC-ISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX12-GENERIC-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GFX12-GENERIC-ISEL-NEXT: ds_load_b64 v[0:1], v0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX12-GENERIC-ISEL-NEXT: s_endpgm
+ %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep0, <4 x i32> %data, metadata !0)
+ ret void
+}
+
+;; Base pointer is uniform, but also in VGPRs, with imm offset
+define amdgpu_kernel void @global_store_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset, <4 x i32> %data) {
+; GFX9-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX9-GENERIC-SDAG: ; %bb.0:
+; GFX9-GENERIC-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX9-GENERIC-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-SDAG-NEXT: ds_read_b64 v[4:5], v0
+; GFX9-GENERIC-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX9-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; GFX9-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX9-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX9-GENERIC-SDAG-NEXT: s_nop 4
+; GFX9-GENERIC-SDAG-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:-120
+; GFX9-GENERIC-SDAG-NEXT: s_endpgm
+;
+; GFX906-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX906-SDAG: ; %bb.0:
+; GFX906-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX906-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-SDAG-NEXT: ds_read_b64 v[4:5], v0
+; GFX906-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX906-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; GFX906-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX906-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX906-SDAG-NEXT: s_nop 4
+; GFX906-SDAG-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:-120
+; GFX906-SDAG-NEXT: s_endpgm
+;
+; GFX908-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX908-SDAG: ; %bb.0:
+; GFX908-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX908-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-SDAG-NEXT: ds_read_b64 v[4:5], v0
+; GFX908-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX908-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; GFX908-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX908-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX908-SDAG-NEXT: s_nop 4
+; GFX908-SDAG-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:-120
+; GFX908-SDAG-NEXT: s_endpgm
+;
+; GFX90a-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX90a-SDAG: ; %bb.0:
+; GFX90a-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX90a-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX90a-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-SDAG-NEXT: ds_read_b64 v[0:1], v0
+; GFX90a-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-SDAG-NEXT: v_mov_b32_e32 v2, s6
+; GFX90a-SDAG-NEXT: v_pk_mov_b32 v[6:7], s[2:3], s[2:3] op_sel:[0,1]
+; GFX90a-SDAG-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; GFX90a-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; GFX90a-SDAG-NEXT: s_nop 4
+; GFX90a-SDAG-NEXT: global_store_dwordx4 v2, v[4:7], s[0:1] offset:-120
+; GFX90a-SDAG-NEXT: s_endpgm
+;
+; GFX9-4-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX9-4-GENERIC-SDAG: ; %bb.0:
+; GFX9-4-GENERIC-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX9-4-GENERIC-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-SDAG-NEXT: ds_read_b64 v[0:1], v0
+; GFX9-4-GENERIC-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s6
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3]
+; GFX9-4-GENERIC-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
+; GFX9-4-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; GFX9-4-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; GFX9-4-GENERIC-SDAG-NEXT: s_nop 4
+; GFX9-4-GENERIC-SDAG-NEXT: global_store_dwordx4 v2, v[4:7], s[0:1] offset:-120 sc0
+; GFX9-4-GENERIC-SDAG-NEXT: s_endpgm
+;
+; GFX942-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: ds_read_b64 v[0:1], v0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, s6
+; GFX942-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3]
+; GFX942-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
+; GFX942-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; GFX942-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; GFX942-SDAG-NEXT: s_nop 4
+; GFX942-SDAG-NEXT: global_store_dwordx4 v2, v[4:7], s[0:1] offset:-120 sc0
+; GFX942-SDAG-NEXT: s_endpgm
+;
+; GFX950-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-SDAG-NEXT: ds_read_b64 v[0:1], v0
+; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, s6
+; GFX950-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3]
+; GFX950-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; GFX950-SDAG-NEXT: s_nop 4
+; GFX950-SDAG-NEXT: global_store_dwordx4 v2, v[4:7], s[0:1] offset:-120 sc0
+; GFX950-SDAG-NEXT: s_endpgm
+;
+; GFX10-1-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX10-1-GENERIC-SDAG: ; %bb.0:
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-SDAG-NEXT: s_clause 0x1
+; GFX10-1-GENERIC-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX10-1-GENERIC-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-1-GENERIC-SDAG-NEXT: ds_read_b64 v[4:5], v0
+; GFX10-1-GENERIC-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX10-1-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; GFX10-1-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX10-1-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX10-1-GENERIC-SDAG-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:-120
+; GFX10-1-GENERIC-SDAG-NEXT: s_endpgm
+;
+; GFX1012-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX1012-SDAG: ; %bb.0:
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-SDAG-NEXT: s_clause 0x1
+; GFX1012-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX1012-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX1012-SDAG-NEXT: ds_read_b64 v[4:5], v0
+; GFX1012-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX1012-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; GFX1012-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX1012-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX1012-SDAG-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:-120
+; GFX1012-SDAG-NEXT: s_endpgm
+;
+; GFX10-3-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX10-3-GENERIC-SDAG: ; %bb.0:
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-SDAG-NEXT: s_clause 0x1
+; GFX10-3-GENERIC-SDAG-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX10-3-GENERIC-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-3-GENERIC-SDAG-NEXT: ds_read_b64 v[4:5], v0
+; GFX10-3-GENERIC-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX10-3-GENERIC-SDAG-NEXT: v_mov_b32_e32 v3, s3
+; GFX10-3-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX10-3-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX10-3-GENERIC-SDAG-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:-120
+; GFX10-3-GENERIC-SDAG-NEXT: s_endpgm
+;
+; GFX11-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX11-GENERIC-SDAG: ; %bb.0:
+; GFX11-GENERIC-SDAG-NEXT: s_clause 0x1
+; GFX11-GENERIC-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x24
+; GFX11-GENERIC-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-SDAG-NEXT: ds_load_b64 v[4:5], v0
+; GFX11-GENERIC-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GENERIC-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX11-GENERIC-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-GENERIC-SDAG-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX11-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX11-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX11-GENERIC-SDAG-NEXT: global_store_b128 v6, v[0:3], s[0:1] offset:-120
+; GFX11-GENERIC-SDAG-NEXT: s_endpgm
+;
+; GFX1250-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX1250-SDAG-NEXT: s_clause 0x1
+; GFX1250-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x24
+; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v6, s6
+; GFX1250-SDAG-NEXT: ds_load_b64 v[4:5], v0
+; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
+; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX1250-SDAG-NEXT: global_store_b128 v6, v[0:3], s[0:1] offset:-120
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX12-GENERIC-SDAG-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX12-GENERIC-SDAG: ; %bb.0:
+; GFX12-GENERIC-SDAG-NEXT: s_clause 0x1
+; GFX12-GENERIC-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x24
+; GFX12-GENERIC-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-SDAG-NEXT: ds_load_b64 v[4:5], v0
+; GFX12-GENERIC-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_mov_b32_e32 v6, s6
+; GFX12-GENERIC-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GENERIC-SDAG-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX12-GENERIC-SDAG-NEXT: s_wait_dscnt 0x0
+; GFX12-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s0, v4
+; GFX12-GENERIC-SDAG-NEXT: v_readfirstlane_b32 s1, v5
+; GFX12-GENERIC-SDAG-NEXT: global_store_b128 v6, v[0:3], s[0:1] offset:-120 scope:SCOPE_SE
+; GFX12-GENERIC-SDAG-NEXT: s_endpgm
+;
+; GFX9-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX9-GENERIC-ISEL: ; %bb.0:
+; GFX9-GENERIC-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX9-GENERIC-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GENERIC-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX9-GENERIC-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX9-GENERIC-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX9-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX9-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX9-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX9-GENERIC-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX9-GENERIC-ISEL-NEXT: s_endpgm
+;
+; GFX906-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX906-ISEL: ; %bb.0:
+; GFX906-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX906-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX906-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX906-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX906-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX906-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX906-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX906-ISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX906-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX906-ISEL-NEXT: s_endpgm
+;
+; GFX908-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX908-ISEL: ; %bb.0:
+; GFX908-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX908-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX908-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX908-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX908-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX908-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX908-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX908-ISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX908-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX908-ISEL-NEXT: s_endpgm
+;
+; GFX90a-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX90a-ISEL: ; %bb.0:
+; GFX90a-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX90a-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX90a-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX90a-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX90a-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
+; GFX90a-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX90a-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX90a-ISEL-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-ISEL-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
+; GFX90a-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX90a-ISEL-NEXT: s_endpgm
+;
+; GFX9-4-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX9-4-GENERIC-ISEL: ; %bb.0:
+; GFX9-4-GENERIC-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX9-4-GENERIC-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-4-GENERIC-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX9-4-GENERIC-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX9-4-GENERIC-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
+; GFX9-4-GENERIC-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX9-4-GENERIC-ISEL-NEXT: s_nop 1
+; GFX9-4-GENERIC-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX9-4-GENERIC-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX9-4-GENERIC-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off offset:-120 sc0
+; GFX9-4-GENERIC-ISEL-NEXT: s_endpgm
+;
+; GFX942-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX942-ISEL: ; %bb.0:
+; GFX942-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX942-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX942-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX942-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX942-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
+; GFX942-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX942-ISEL-NEXT: s_nop 1
+; GFX942-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off offset:-120 sc0
+; GFX942-ISEL-NEXT: s_endpgm
+;
+; GFX950-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX950-ISEL: ; %bb.0:
+; GFX950-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX950-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX950-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX950-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX950-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
+; GFX950-ISEL-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
+; GFX950-ISEL-NEXT: s_nop 1
+; GFX950-ISEL-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX950-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off offset:-120 sc0
+; GFX950-ISEL-NEXT: s_endpgm
+;
+; GFX10-1-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX10-1-GENERIC-ISEL: ; %bb.0:
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-1-GENERIC-ISEL-NEXT: s_clause 0x1
+; GFX10-1-GENERIC-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX10-1-GENERIC-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-1-GENERIC-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX10-1-GENERIC-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX10-1-GENERIC-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX10-1-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX10-1-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX10-1-GENERIC-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX10-1-GENERIC-ISEL-NEXT: s_endpgm
+;
+; GFX1012-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX1012-ISEL: ; %bb.0:
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1012-ISEL-NEXT: s_clause 0x1
+; GFX1012-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX1012-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX1012-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX1012-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX1012-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX1012-ISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX1012-ISEL-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX1012-ISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX1012-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX1012-ISEL-NEXT: s_endpgm
+;
+; GFX10-3-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX10-3-GENERIC-ISEL: ; %bb.0:
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-3-GENERIC-ISEL-NEXT: s_clause 0x1
+; GFX10-3-GENERIC-ISEL-NEXT: s_load_dword s6, s[4:5], 0x24
+; GFX10-3-GENERIC-ISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-3-GENERIC-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX10-3-GENERIC-ISEL-NEXT: ds_read_b64 v[0:1], v0
+; GFX10-3-GENERIC-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX10-3-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX10-3-GENERIC-ISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX10-3-GENERIC-ISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off offset:-120
+; GFX10-3-GENERIC-ISEL-NEXT: s_endpgm
+;
+; GFX11-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX11-GENERIC-ISEL: ; %bb.0:
+; GFX11-GENERIC-ISEL-NEXT: s_clause 0x1
+; GFX11-GENERIC-ISEL-NEXT: s_load_b32 s6, s[4:5], 0x24
+; GFX11-GENERIC-ISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX11-GENERIC-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX11-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GFX11-GENERIC-ISEL-NEXT: ds_load_b64 v[0:1], v0
+; GFX11-GENERIC-ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX11-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
+; GFX11-GENERIC-ISEL-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
+; GFX11-GENERIC-ISEL-NEXT: global_store_b128 v[4:5], v[0:3], off offset:-120
+; GFX11-GENERIC-ISEL-NEXT: s_endpgm
+;
+; GFX1250-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX1250-ISEL: ; %bb.0:
+; GFX1250-ISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX1250-ISEL-NEXT: s_clause 0x1
+; GFX1250-ISEL-NEXT: s_load_b32 s6, s[4:5], 0x24
+; GFX1250-ISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX1250-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX1250-ISEL-NEXT: ds_load_b64 v[0:1], v0
+; GFX1250-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
+; GFX1250-ISEL-NEXT: s_wait_dscnt 0x0
+; GFX1250-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-ISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX1250-ISEL-NEXT: v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX1250-ISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX1250-ISEL-NEXT: global_store_b128 v[4:5], v[0:3], off offset:-120
+; GFX1250-ISEL-NEXT: s_endpgm
+;
+; GFX12-GENERIC-ISEL-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX12-GENERIC-ISEL: ; %bb.0:
+; GFX12-GENERIC-ISEL-NEXT: s_clause 0x1
+; GFX12-GENERIC-ISEL-NEXT: s_load_b32 s6, s[4:5], 0x24
+; GFX12-GENERIC-ISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX12-GENERIC-ISEL-NEXT: s_mov_b32 s7, 0
+; GFX12-GENERIC-ISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GFX12-GENERIC-ISEL-NEXT: ds_load_b64 v[0:1], v0
+; GFX12-GENERIC-ISEL-NEXT: s_wait_dscnt 0x0
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
+; GFX12-GENERIC-ISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GENERIC-ISEL-NEXT: v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
+; GFX12-GENERIC-ISEL-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
+; GFX12-GENERIC-ISEL-NEXT: global_store_b128 v[4:5], v[0:3], off offset:-120 scope:SCOPE_SE
+; GFX12-GENERIC-ISEL-NEXT: s_endpgm
+ %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds
+ %zext.offset = zext i32 %voffset to i64
+ %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -120
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %gep1, <4 x i32> %data, metadata !1)
+ ret void
+}
+
+;;==============================================================================
+;; } End saddr addressing modes
+;;==============================================================================
+
+
+!0 = !{!"wavefront"}
+!1 = !{!"workgroup"}
+!2 = !{!"agent"}
+!3 = !{!""}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX: {{.*}}
+; GFX-ISEL: {{.*}}
+; GFX-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-global-load.ll b/llvm/test/CodeGen/AMDGPU/unsupported-global-load.ll
new file mode 100644
index 0000000000000..af4b9dc1d98d4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-global-load.ll
@@ -0,0 +1,36 @@
+; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx602 < %s 2>&1 | FileCheck -check-prefixes=GFX602 %s
+; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx705 < %s 2>&1 | FileCheck -check-prefixes=GFX705 %s
+; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx810 < %s 2>&1 | FileCheck -check-prefixes=GFX810 %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-generic < %s 2>&1 | FileCheck -check-prefixes=GFX9-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-4-generic < %s 2>&1 | FileCheck -check-prefixes=GFX9-4-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-1-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-3-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx11-generic < %s 2>&1 | FileCheck -check-prefixes=GFX11-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx12-generic < %s 2>&1 | FileCheck -check-prefixes=GFX12-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s 2>&1 | FileCheck -check-prefixes=GFX1250 %s
+
+; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx602 < %s 2>&1 | FileCheck -check-prefixes=GFX602-GBL-ISEL %s
+; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx705 < %s 2>&1 | FileCheck -check-prefixes=GFX705-GBL-ISEL %s
+; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx810 < %s 2>&1 | FileCheck -check-prefixes=GFX810-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-generic < %s 2>&1 | FileCheck -check-prefixes=GFX9-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-4-generic < %s 2>&1 | FileCheck -check-prefixes=GFX9-4-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-1-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-3-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx11-generic < %s 2>&1 | FileCheck -check-prefixes=GFX11-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx12-generic < %s 2>&1 | FileCheck -check-prefixes=GFX12-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s 2>&1 | FileCheck -check-prefixes=GFX1250-GBL-ISEL %s
+
+define <4 x i32> @global_load_b128(ptr addrspace(1) %addr) {
+; GFX602: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.load.b128
+; GFX705: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.load.b128
+; GFX810: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.load.b128
+
+; GFX602-GBL-ISEL: LLVM ERROR: cannot select: {{.*}} intrinsic(@llvm.amdgcn.global.load.b128)
+; GFX705-GBL-ISEL: LLVM ERROR: cannot select: {{.*}} intrinsic(@llvm.amdgcn.global.load.b128)
+; GFX810-GBL-ISEL: LLVM ERROR: cannot select: {{.*}} intrinsic(@llvm.amdgcn.global.load.b128)
+entry:
+ %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !0)
+ ret <4 x i32> %data
+}
+
+!0 = !{!""}
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-global-store.ll b/llvm/test/CodeGen/AMDGPU/unsupported-global-store.ll
new file mode 100644
index 0000000000000..a24c17f0e9905
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-global-store.ll
@@ -0,0 +1,36 @@
+; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx602 < %s 2>&1 | FileCheck -check-prefixes=GFX602 %s
+; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx705 < %s 2>&1 | FileCheck -check-prefixes=GFX705 %s
+; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx810 < %s 2>&1 | FileCheck -check-prefixes=GFX810 %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-generic < %s 2>&1 | FileCheck -check-prefixes=GFX9-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx9-4-generic < %s 2>&1 | FileCheck -check-prefixes=GFX9-4-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-1-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-3-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx11-generic < %s 2>&1 | FileCheck -check-prefixes=GFX11-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx12-generic < %s 2>&1 | FileCheck -check-prefixes=GFX12-GENERIC %s
+; xxx: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s 2>&1 | FileCheck -check-prefixes=GFX1250 %s
+
+; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx602 < %s 2>&1 | FileCheck -check-prefixes=GFX602-GBL-ISEL %s
+; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx705 < %s 2>&1 | FileCheck -check-prefixes=GFX705-GBL-ISEL %s
+; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx810 < %s 2>&1 | FileCheck -check-prefixes=GFX810-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-generic < %s 2>&1 | FileCheck -check-prefixes=GFX9-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx9-4-generic < %s 2>&1 | FileCheck -check-prefixes=GFX9-4-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-1-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s 2>&1 | FileCheck -check-prefixes=GFX10-3-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx11-generic < %s 2>&1 | FileCheck -check-prefixes=GFX11-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx12-generic < %s 2>&1 | FileCheck -check-prefixes=GFX12-GENERIC-GBL-ISEL %s
+; xxx: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s 2>&1 | FileCheck -check-prefixes=GFX1250-GBL-ISEL %s
+
+define void @global_store_b128(ptr addrspace(1) %addr, <4 x i32> %data) {
+; GFX602: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.store.b128
+; GFX705: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.store.b128
+; GFX810: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.store.b128
+
+; GFX602-GBL-ISEL: LLVM ERROR: cannot select: {{.*}} intrinsic(@llvm.amdgcn.global.store.b128)
+; GFX705-GBL-ISEL: LLVM ERROR: cannot select: {{.*}} intrinsic(@llvm.amdgcn.global.store.b128)
+; GFX810-GBL-ISEL: LLVM ERROR: cannot select: {{.*}} intrinsic(@llvm.amdgcn.global.store.b128)
+entry:
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !0)
+ ret void
+}
+
+!0 = !{!""}
diff --git a/llvm/test/Verifier/amdgpu-intrinsics.ll b/llvm/test/Verifier/amdgpu-intrinsics.ll
new file mode 100644
index 0000000000000..b774c4cb12fbd
--- /dev/null
+++ b/llvm/test/Verifier/amdgpu-intrinsics.ll
@@ -0,0 +1,66 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+; ---------- i32 metadata ------------------------------------------------------
+; CHECK: global load/store intrinsics require that the last argument is a metadata string
+; CHECK-NEXT: call <4 x i32> @llvm.amdgcn.global.load.b128({{.*}})
+; CHECK-NEXT: metadata i32 1
+define <4 x i32> @global_load_b128_00(ptr addrspace(1) %addr) {
+entry:
+ %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !3)
+ ret <4 x i32> %data
+}
+
+; CHECK: global load/store intrinsics require that the last argument is a metadata string
+; CHECK-NEXT: call void @llvm.amdgcn.global.store.b128({{.*}})
+; CHECK-NEXT: metadata i32 1
+define void @global_store_b128_00(ptr addrspace(1) %addr, <4 x i32> %data) {
+entry:
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !3)
+ ret void
+}
+
+; ---------- non-tuple metadata ------------------------------------------------
+; CHECK: global load/store intrinsics require that the last argument is a metadata string
+; CHECK-NEXT: call <4 x i32> @llvm.amdgcn.global.load.b128({{.*}})
+; CHECK-NEXT: metadata !0
+define <4 x i32> @global_load_b128_01(ptr addrspace(1) %addr) {
+entry:
+ %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !0)
+ ret <4 x i32> %data
+}
+
+; CHECK: global load/store intrinsics require that the last argument is a metadata string
+; CHECK-NEXT: call void @llvm.amdgcn.global.store.b128({{.*}})
+; CHECK-NEXT: metadata !0
+define void @global_store_b128_01(ptr addrspace(1) %addr, <4 x i32> %data) {
+entry:
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !0)
+ ret void
+}
+
+; ---------- invalid string metadata -------------------------------------------
+; CHECK: 'wave' is not a valid scope for global load/store intrinsics
+; CHECK-NEXT: call <4 x i32> @llvm.amdgcn.global.load.b128({{.*}})
+; CHECK-NEXT: metadata !2
+define <4 x i32> @global_load_b128_02(ptr addrspace(1) %addr) {
+entry:
+ %data = call <4 x i32> @llvm.amdgcn.global.load.b128(ptr addrspace(1) %addr, metadata !2)
+ ret <4 x i32> %data
+}
+
+; CHECK: 'wave' is not a valid scope for global load/store intrinsics
+; CHECK-NEXT: call void @llvm.amdgcn.global.store.b128({{.*}})
+; CHECK-NEXT: metadata !2
+define void @global_store_b128_02(ptr addrspace(1) %addr, <4 x i32> %data) {
+entry:
+ call void @llvm.amdgcn.global.store.b128(ptr addrspace(1) %addr, <4 x i32> %data, metadata !2)
+ ret void
+}
+
+
+!0 = !{!1}
+!1 = !{!""}
+
+!2 = !{!"wave"}
+
+!3 = !{i32 1}
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
index 8540faed34e5d..df326374deb46 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
@@ -1472,6 +1472,12 @@ Error OperandMatcher::addTypeCheckPredicate(const TypeSetByHwMode &VTy,
return Error::success();
}
+ llvm::MVT::SimpleValueType STy = VTy.getMachineValueType().SimpleTy;
+ if (STy == MVT::Metadata) {
+ addPredicate<MachineOperandTypeMatcher>(MachineOperand::MO_Metadata);
+ return Error::success();
+ }
+
auto OpTyOrNone = MVTToLLT(VTy.getMachineValueType().SimpleTy);
if (!OpTyOrNone)
return failUnsupported("unsupported type");
@@ -1937,6 +1943,17 @@ bool InstructionOperandMatcher::isHigherPriorityThan(
return false;
}
+//===- MachineOperandTypeMatcher -----------------------------------------===//
+
+void MachineOperandTypeMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckMachineOperandType")
+ << MatchTable::Comment("MI") << MatchTable::ULEB128Value(InsnVarID)
+ << MatchTable::Comment("Op") << MatchTable::ULEB128Value(OpIdx)
+ << MatchTable::Comment("Ty") << MatchTable::ULEB128Value(MOTy)
+ << MatchTable::LineBreak;
+}
+
//===- OperandRenderer ----------------------------------------------------===//
OperandRenderer::~OperandRenderer() = default;
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
index 6a8017894a486..d770f6ec653b1 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
@@ -23,6 +23,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGenTypes/LowLevelType.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/SaveAndRestore.h"
@@ -837,6 +838,7 @@ class PredicateMatcher {
OPM_MBB,
OPM_RecordNamedOperand,
OPM_RecordRegType,
+ OPM_MOType,
};
protected:
@@ -1926,6 +1928,22 @@ class InstructionOperandMatcher : public OperandPredicateMatcher {
}
};
+class MachineOperandTypeMatcher : public OperandPredicateMatcher {
+ const MachineOperand::MachineOperandType MOTy;
+
+public:
+ MachineOperandTypeMatcher(unsigned InsnVarID, unsigned OpIdx,
+ MachineOperand::MachineOperandType MOTy)
+ : OperandPredicateMatcher(OPM_MOType, InsnVarID, OpIdx), MOTy(MOTy) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_MOType;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
//===- Actions ------------------------------------------------------------===//
class OperandRenderer {
public:
More information about the llvm-commits
mailing list