[llvm] AMDGPU/GlobalISel: Partially move constant selection to patterns (PR #100786)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 29 11:02:13 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100786
>From 49c8245d622973e3491a7f8233c8eecc1d3a05c0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 31 Jul 2020 16:32:41 -0400
Subject: [PATCH 1/2] AMDGPU/GlobalISel: Partially move constant selection to
patterns
This is still relying on the manual code for splitting 64-bit
constants, and handling pointers.
We were missing some of the tablegen patterns for all immediate types,
so this has some side effect DAG path improvements. This also reduces
the diff in the 2 selector outputs.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 4 +-
.../AMDGPU/AMDGPUInstructionSelector.cpp | 27 +-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 13 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 +-
llvm/lib/Target/AMDGPU/SIInstructions.td | 20 +-
.../AMDGPU/GlobalISel/atomicrmw_fmax.ll | 8 +-
.../AMDGPU/GlobalISel/atomicrmw_fmin.ll | 8 +-
...-divergent-i1-phis-no-lane-mask-merging.ll | 29 +-
.../GlobalISel/divergent-control-flow.ll | 8 +-
.../CodeGen/AMDGPU/GlobalISel/floor.f64.ll | 72 +-
.../GlobalISel/global-atomic-fadd.f64.ll | 32 +-
.../GlobalISel/inst-select-constant.mir | 253 ++++-
.../GlobalISel/inst-select-fconstant.mir | 14 +-
.../GlobalISel/inst-select-load-constant.mir | 30 +-
.../inst-select-load-global-saddr.mir | 192 ++--
.../GlobalISel/inst-select-load-smrd.mir | 12 +-
.../AMDGPU/GlobalISel/inst-select-ptrmask.mir | 42 +-
.../GlobalISel/llvm.amdgcn.intersect_ray.ll | 29 +-
.../GlobalISel/llvm.amdgcn.mfma.gfx90a.ll | 2 +-
.../GlobalISel/llvm.amdgcn.rsq.clamp.ll | 104 ++-
.../GlobalISel/llvm.amdgcn.set.inactive.ll | 6 +-
.../CodeGen/AMDGPU/GlobalISel/llvm.memset.ll | 7 +-
.../CodeGen/AMDGPU/GlobalISel/mubuf-global.ll | 165 ++--
.../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll | 6 +-
.../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 44 +-
.../CodeGen/AMDGPU/GlobalISel/udiv.i64.ll | 6 +-
.../CodeGen/AMDGPU/GlobalISel/urem.i64.ll | 6 +-
.../CodeGen/AMDGPU/frame-index-elimination.ll | 4 +-
.../CodeGen/AMDGPU/insert_vector_dynelt.ll | 18 +-
llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll | 128 +--
.../AMDGPU/lower-lds-struct-aa-memcpy.ll | 21 +-
llvm/test/CodeGen/AMDGPU/mmra.ll | 5 +-
llvm/test/CodeGen/AMDGPU/offset-split-flat.ll | 879 +++++-------------
.../CodeGen/AMDGPU/offset-split-global.ll | 666 ++++---------
llvm/test/CodeGen/AMDGPU/roundeven.ll | 18 +-
.../CodeGen/AMDGPU/scalar_to_vector_v2x16.ll | 12 +-
.../AMDGPU/vgpr-spill-placement-issue61083.ll | 3 +-
37 files changed, 1129 insertions(+), 1768 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 537d3a43aa9fa..825c1de6c91f1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -398,8 +398,10 @@ def gi_as_i1timm : GICustomOperandRenderer<"renderTruncTImm">,
def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">,
GISDNodeXFormEquiv<NegateImm>;
-def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastImm">,
+def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastFPImm32">,
GISDNodeXFormEquiv<bitcast_fpimm_to_i32>;
+def gi_bitcast_fpimm_to_i64 : GICustomOperandRenderer<"renderBitcastFPImm64">,
+ GISDNodeXFormEquiv<bitcast_fpimm_to_i64>;
def gi_IMMPopCount : GICustomOperandRenderer<"renderPopcntImm">,
GISDNodeXFormEquiv<IMMPopCount>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index aaa292291334c..9a73629b0f0cd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2504,10 +2504,19 @@ bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr &I) const {
}
bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
+ if (selectImpl(I, *CoverageInfo))
+ return true;
+
+ // FIXME: Relying on manual selection for 64-bit case, and pointer typed
+ // constants.
MachineBasicBlock *BB = I.getParent();
MachineOperand &ImmOp = I.getOperand(1);
Register DstReg = I.getOperand(0).getReg();
- unsigned Size = MRI->getType(DstReg).getSizeInBits();
+ LLT Ty = MRI->getType(DstReg);
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 64 || Ty.isPointer()) &&
+ "patterns should have selected this");
+
bool IsFP = false;
// The AMDGPU backend only supports Imm operands and not CImm or FPImm.
@@ -5606,18 +5615,12 @@ void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB,
MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue());
}
-void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx) const {
- assert(OpIdx == -1);
-
+void AMDGPUInstructionSelector::renderBitcastFPImm(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
const MachineOperand &Op = MI.getOperand(1);
- if (MI.getOpcode() == TargetOpcode::G_FCONSTANT)
- MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
- else {
- assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
- MIB.addImm(Op.getCImm()->getSExtValue());
- }
+ assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1);
+ MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
}
void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 43ed210508d33..0e98d25e36a3a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -333,8 +333,17 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
- void renderBitcastImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
- int OpIdx) const;
+ void renderBitcastFPImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
+ void renderBitcastFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const {
+ renderBitcastFPImm(MIB, MI, OpIdx);
+ }
+ void renderBitcastFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const {
+ renderBitcastFPImm(MIB, MI, OpIdx);
+ }
void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 00a8b159c9526..4ab903f8c9ad0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -828,7 +828,9 @@ def InlineImmFP64 : FPImmLeaf<f64, [{
class VGPRImm <dag frag> : PatLeaf<frag, [{
return isVGPRImm(N);
-}]>;
+}]> {
+ let GISelPredicateCode = [{return true;}];
+}
def NegateImm : SDNodeXForm<imm, [{
return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 15078bc941292..d2101654d2acb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2163,6 +2163,11 @@ def : GCNPat <
(S_MOV_B32 $ga)
>;
+def : GCNPat <
+ (VGPRImm<(i16 imm)>:$imm),
+ (V_MOV_B32_e32 imm:$imm)
+>;
+
// FIXME: Workaround for ordering issue with peephole optimizer where
// a register class copy interferes with immediate folding. Should
// use s_mov_b32, which can be shrunk to s_movk_i32
@@ -2229,20 +2234,15 @@ def : GCNPat <
(S_MOV_B64 InlineImm64:$imm)
>;
-// XXX - Should this use a s_cmp to set SCC?
-
// Set to sign-extended 64-bit value (true = -1, false = 0)
-def : GCNPat <
- (i1 imm:$imm),
- (S_MOV_B64 (i64 (as_i64imm $imm)))
-> {
+// Set to sign-extended 64-bit value (true = -1, false = 0)
+def : GCNPat <(i1 imm:$imm),
+ (S_MOV_B64 imm:$imm)> {
let WaveSizePredicate = isWave64;
}
-def : GCNPat <
- (i1 imm:$imm),
- (S_MOV_B32 (i32 (as_i32imm $imm)))
-> {
+def : GCNPat <(i1 imm:$imm),
+ (S_MOV_B32 imm:$imm)> {
let WaveSizePredicate = isWave32;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
index c701e873fdd2c..1270e69998e6c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
@@ -501,8 +501,8 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
-; GFX7-NEXT: s_mov_b64 s[8:9], 0
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB4_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
@@ -710,8 +710,8 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
-; GFX7-NEXT: s_mov_b64 s[8:9], 0
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v2
+; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB5_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
@@ -936,7 +936,7 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3]
-; GFX7-NEXT: s_mov_b64 s[8:9], 0
+; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB6_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
@@ -1150,7 +1150,7 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3]
-; GFX7-NEXT: s_mov_b64 s[8:9], 0
+; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB7_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll
index 90110e6e0c09e..e36161a936e36 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll
@@ -501,8 +501,8 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(pt
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
-; GFX7-NEXT: s_mov_b64 s[8:9], 0
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
+; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB4_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
@@ -710,8 +710,8 @@ define void @global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(p
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
-; GFX7-NEXT: s_mov_b64 s[8:9], 0
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v2
+; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB5_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
@@ -936,7 +936,7 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(p
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3]
-; GFX7-NEXT: s_mov_b64 s[8:9], 0
+; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB6_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
@@ -1150,7 +1150,7 @@ define void @global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(p
; GFX7-NEXT: s_mov_b64 s[4:5], 0
; GFX7-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[2:3]
-; GFX7-NEXT: s_mov_b64 s[8:9], 0
+; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: .LBB7_1: ; %atomicrmw.start
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
index d4d5cb18bbd30..966a481b6594d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
@@ -226,15 +226,16 @@ exit:
define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3 x i32> inreg %.WorkgroupId, <3 x i32> %.LocalInvocationId) #0 {
; GFX10-LABEL: single_lane_execution_attribute:
; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_mov_b32 s6, 0
; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_mov_b32 s12, 0
-; GFX10-NEXT: s_mov_b32 s13, -1
-; GFX10-NEXT: s_mov_b32 s2, s0
-; GFX10-NEXT: s_and_b64 s[4:5], s[4:5], s[12:13]
-; GFX10-NEXT: s_mov_b32 s3, s12
+; GFX10-NEXT: s_mov_b32 s7, -1
+; GFX10-NEXT: s_mov_b32 s2, s1
+; GFX10-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
+; GFX10-NEXT: s_mov_b32 s1, 0
; GFX10-NEXT: v_mbcnt_lo_u32_b32 v1, -1, 0
-; GFX10-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
-; GFX10-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0
+; GFX10-NEXT: s_or_b64 s[12:13], s[4:5], s[0:1]
+; GFX10-NEXT: s_mov_b32 s3, -1
+; GFX10-NEXT: s_load_dwordx8 s[4:11], s[12:13], 0x0
; GFX10-NEXT: v_mbcnt_hi_u32_b32 v1, -1, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v1
; GFX10-NEXT: v_and_b32_e32 v3, 1, v1
@@ -248,8 +249,8 @@ define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v2
; GFX10-NEXT: s_cbranch_vccnz .LBB4_4
; GFX10-NEXT: ; %bb.1: ; %.preheader.preheader
-; GFX10-NEXT: v_mov_b32_e32 v3, s12
-; GFX10-NEXT: v_mov_b32_e32 v4, s12
+; GFX10-NEXT: v_mov_b32_e32 v3, s1
+; GFX10-NEXT: v_mov_b32_e32 v4, s1
; GFX10-NEXT: .LBB4_2: ; %.preheader
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: buffer_load_dword v5, v3, s[4:7], 0 offen
@@ -261,17 +262,17 @@ define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3
; GFX10-NEXT: s_cbranch_vccnz .LBB4_2
; GFX10-NEXT: ; %bb.3: ; %.preheader._crit_edge
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v2
-; GFX10-NEXT: s_mov_b32 s13, 0
-; GFX10-NEXT: s_or_b32 s2, s0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2
+; GFX10-NEXT: s_mov_b32 s3, 0
+; GFX10-NEXT: s_or_b32 s1, s0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s1
; GFX10-NEXT: .LBB4_4: ; %Flow
-; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s13
+; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s3
; GFX10-NEXT: s_cbranch_vccz .LBB4_6
; GFX10-NEXT: ; %bb.5: ; %.19
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
; GFX10-NEXT: v_or_b32_e32 v3, 2, v1
; GFX10-NEXT: .LBB4_6: ; %.22
-; GFX10-NEXT: v_add_lshl_u32 v0, v0, s1, 2
+; GFX10-NEXT: v_add_lshl_u32 v0, v0, s2, 2
; GFX10-NEXT: buffer_store_dword v3, v0, s[8:11], 0 offen
; GFX10-NEXT: s_endpgm
.entry:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index 5515de0cd2fee..f52b7c635a66f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -193,12 +193,12 @@ bb12:
define amdgpu_kernel void @break_loop(i32 %arg) {
; CHECK-LABEL: break_loop:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_load_dword s2, s[6:7], 0x0
-; CHECK-NEXT: s_mov_b64 s[0:1], 0
+; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
+; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
; CHECK-NEXT: ; implicit-def: $vgpr1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_subrev_u32_e32 v0, s2, v0
-; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT: v_subrev_u32_e32 v0, s0, v0
+; CHECK-NEXT: s_mov_b64 s[0:1], 0
; CHECK-NEXT: s_branch .LBB5_3
; CHECK-NEXT: .LBB5_1: ; %bb4
; CHECK-NEXT: ; in Loop: Header=BB5_3 Depth=1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll
index 34635b077cd92..e19e7823053c8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll
@@ -8,9 +8,9 @@ define double @v_floor_f64_ieee(double %x) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
-; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_mov_b32_e32 v4, -1
+; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
@@ -31,9 +31,9 @@ define double @v_floor_f64_ieee_nnan(double %x) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
-; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_mov_b32_e32 v4, -1
+; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
@@ -51,9 +51,9 @@ define double @v_floor_f64_ieee_fneg(double %x) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1]
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
-; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_mov_b32_e32 v4, -1
+; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
@@ -75,9 +75,9 @@ define double @v_floor_f64_nonieee(double %x) #1 {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
-; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_mov_b32_e32 v4, -1
+; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
@@ -98,9 +98,9 @@ define double @v_floor_f64_nonieee_nnan(double %x) #1 {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
-; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_mov_b32_e32 v4, -1
+; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
@@ -118,9 +118,9 @@ define double @v_floor_f64_non_ieee_fneg(double %x) #1 {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1]
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
-; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_mov_b32_e32 v4, -1
+; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
@@ -142,9 +142,9 @@ define double @v_floor_f64_fabs(double %x) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e64 v[2:3], |v[0:1]|
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
-; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_mov_b32_e32 v4, -1
+; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
@@ -166,9 +166,9 @@ define double @v_floor_f64_fneg_fabs(double %x) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e64 v[2:3], -|v[0:1]|
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
-; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_mov_b32_e32 v4, -1
+; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
@@ -190,9 +190,9 @@ define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) {
; GFX6-LABEL: s_floor_f64:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_fract_f64_e32 v[0:1], s[2:3]
-; GFX6-NEXT: s_mov_b32 s0, -1
-; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
-; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
+; GFX6-NEXT: v_mov_b32_e32 v2, -1
+; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, s2
; GFX6-NEXT: v_mov_b32_e32 v3, s3
@@ -214,9 +214,9 @@ define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) {
; GFX6-LABEL: s_floor_f64_fneg:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_fract_f64_e64 v[0:1], -s[2:3]
-; GFX6-NEXT: s_mov_b32 s0, -1
-; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
-; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
+; GFX6-NEXT: v_mov_b32_e32 v2, -1
+; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, s2
; GFX6-NEXT: v_mov_b32_e32 v3, s3
@@ -239,9 +239,9 @@ define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) {
; GFX6-LABEL: s_floor_f64_fabs:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_fract_f64_e64 v[0:1], |s[2:3]|
-; GFX6-NEXT: s_mov_b32 s0, -1
-; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
-; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
+; GFX6-NEXT: v_mov_b32_e32 v2, -1
+; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, s2
; GFX6-NEXT: v_mov_b32_e32 v3, s3
@@ -264,9 +264,9 @@ define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) {
; GFX6-LABEL: s_floor_f64_fneg_fabs:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_fract_f64_e64 v[0:1], -|s[2:3]|
-; GFX6-NEXT: s_mov_b32 s0, -1
-; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
-; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
+; GFX6-NEXT: v_mov_b32_e32 v2, -1
+; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v2, s2
; GFX6-NEXT: v_mov_b32_e32 v3, s3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
index b54aec935bd5f..3c57832a25e32 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
@@ -434,8 +434,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
; GFX90A_ITERATIVE-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
; GFX90A_ITERATIVE-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
- ; GFX90A_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
+ ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+ ; GFX90A_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_]]
; GFX90A_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY8]], implicit $exec
; GFX90A_ITERATIVE-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
; GFX90A_ITERATIVE-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
@@ -446,8 +446,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[V_NOT_B32_e32_]], implicit $exec
; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[V_NOT_B32_e32_1]], implicit $exec
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
- ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
- ; GFX90A_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
+ ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX90A_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_1]]
; GFX90A_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE2]], [[COPY13]], implicit $exec
; GFX90A_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
; GFX90A_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
@@ -615,8 +615,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
; GFX940_ITERATIVE-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
; GFX940_ITERATIVE-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
- ; GFX940_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
- ; GFX940_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
+ ; GFX940_ITERATIVE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+ ; GFX940_ITERATIVE-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_]]
; GFX940_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY8]], implicit $exec
; GFX940_ITERATIVE-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
; GFX940_ITERATIVE-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
@@ -627,8 +627,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY11]], [[V_NOT_B32_e32_]], implicit $exec
; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY12]], [[V_NOT_B32_e32_1]], implicit $exec
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
- ; GFX940_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
- ; GFX940_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
+ ; GFX940_ITERATIVE-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX940_ITERATIVE-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_1]]
; GFX940_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE2]], [[COPY13]], implicit $exec
; GFX940_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
; GFX940_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
@@ -837,8 +837,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_WRITELANE_B32_]], %subreg.sub0, [[V_WRITELANE_B32_1]], %subreg.sub1
; GFX90A_ITERATIVE-NEXT: [[COPY20:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
; GFX90A_ITERATIVE-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI2]], 0, [[COPY20]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
- ; GFX90A_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
+ ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+ ; GFX90A_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_]]
; GFX90A_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY21]], implicit $exec
; GFX90A_ITERATIVE-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
; GFX90A_ITERATIVE-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
@@ -849,8 +849,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY24]], [[V_NOT_B32_e32_]], implicit $exec
; GFX90A_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY25]], [[V_NOT_B32_e32_1]], implicit $exec
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
- ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
- ; GFX90A_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
+ ; GFX90A_ITERATIVE-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX90A_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_1]]
; GFX90A_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE5]], [[COPY26]], implicit $exec
; GFX90A_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
; GFX90A_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
@@ -1089,8 +1089,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_WRITELANE_B32_]], %subreg.sub0, [[V_WRITELANE_B32_1]], %subreg.sub1
; GFX940_ITERATIVE-NEXT: [[COPY20:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
; GFX940_ITERATIVE-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI2]], 0, [[COPY20]], 0, 0, implicit $mode, implicit $exec
- ; GFX940_ITERATIVE-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
- ; GFX940_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B1]]
+ ; GFX940_ITERATIVE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+ ; GFX940_ITERATIVE-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_]]
; GFX940_ITERATIVE-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[V_MIN_U32_e64_]], [[COPY21]], implicit $exec
; GFX940_ITERATIVE-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
; GFX940_ITERATIVE-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
@@ -1101,8 +1101,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY24]], [[V_NOT_B32_e32_]], implicit $exec
; GFX940_ITERATIVE-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY25]], [[V_NOT_B32_e32_1]], implicit $exec
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
- ; GFX940_ITERATIVE-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
- ; GFX940_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B2]]
+ ; GFX940_ITERATIVE-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX940_ITERATIVE-NEXT: [[COPY26:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B64_1]]
; GFX940_ITERATIVE-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U64_e64 [[REG_SEQUENCE5]], [[COPY26]], implicit $exec
; GFX940_ITERATIVE-NEXT: $vcc = COPY [[V_CMP_NE_U64_e64_]]
; GFX940_ITERATIVE-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir
index 27d4698f8f6e8..567c9ca41ed19 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir
@@ -142,13 +142,9 @@ body: |
; WAVE64-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -54, implicit $exec
; WAVE64-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 27, implicit $exec
; WAVE64-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967295, implicit $exec
- ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- ; WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec
- ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec
- ; WAVE64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1
- ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]]
+ ; WAVE64-NEXT: [[V_MOV_B6:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967296, implicit $exec
+ ; WAVE64-NEXT: [[V_MOV_B7:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -68719453481, implicit $exec
+ ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[V_MOV_B6]], implicit [[V_MOV_B7]]
;
; WAVE32-LABEL: name: constant_v_s64
; WAVE32: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
@@ -157,13 +153,9 @@ body: |
; WAVE32-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -54, implicit $exec
; WAVE32-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 27, implicit $exec
; WAVE32-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967295, implicit $exec
- ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- ; WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec
- ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec
- ; WAVE32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1
- ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]]
+ ; WAVE32-NEXT: [[V_MOV_B6:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967296, implicit $exec
+ ; WAVE32-NEXT: [[V_MOV_B7:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -68719453481, implicit $exec
+ ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[V_MOV_B6]], implicit [[V_MOV_B7]]
%0:vgpr(s64) = G_CONSTANT i64 0
%1:vgpr(s64) = G_CONSTANT i64 1
%2:vgpr(s64) = G_CONSTANT i64 -1
@@ -184,34 +176,26 @@ tracksRegLiveness: true
body: |
bb.0:
; WAVE64-LABEL: name: constant_s_s64
- ; WAVE64: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
- ; WAVE64-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
- ; WAVE64-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
- ; WAVE64-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54
- ; WAVE64-NEXT: [[S_MOV_B4:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 27
- ; WAVE64-NEXT: [[S_MOV_B5:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
- ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23255
- ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -16
- ; WAVE64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1
- ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]], implicit [[S_MOV_B4]], implicit [[S_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]]
+ ; WAVE64: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; WAVE64-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+ ; WAVE64-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
+ ; WAVE64-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54
+ ; WAVE64-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27
+ ; WAVE64-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
+ ; WAVE64-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
+ ; WAVE64-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -68719453481
+ ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[S_MOV_B]], implicit [[S_MOV_B64_3]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]]
;
; WAVE32-LABEL: name: constant_s_s64
- ; WAVE32: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
- ; WAVE32-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1
- ; WAVE32-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
- ; WAVE32-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54
- ; WAVE32-NEXT: [[S_MOV_B4:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 27
- ; WAVE32-NEXT: [[S_MOV_B5:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
- ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23255
- ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -16
- ; WAVE32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1
- ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]], implicit [[S_MOV_B4]], implicit [[S_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]]
+ ; WAVE32: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; WAVE32-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+ ; WAVE32-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
+ ; WAVE32-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54
+ ; WAVE32-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27
+ ; WAVE32-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295
+ ; WAVE32-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
+ ; WAVE32-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -68719453481
+ ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[S_MOV_B]], implicit [[S_MOV_B64_3]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]]
%0:sgpr(s64) = G_CONSTANT i64 0
%1:sgpr(s64) = G_CONSTANT i64 1
%2:sgpr(s64) = G_CONSTANT i64 -1
@@ -310,6 +294,195 @@ body: |
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
...
+---
+name: constant_s_p2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+
+ ; WAVE64-LABEL: name: constant_s_p2
+ ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
+ ; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
+ ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
+ ;
+ ; WAVE32-LABEL: name: constant_s_p2
+ ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
+ ; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
+ ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
+ %0:sgpr(p2) = G_CONSTANT i32 0
+ %1:sgpr(p2) = G_CONSTANT i32 1
+ %2:sgpr(p2) = G_CONSTANT i32 -1
+ %3:sgpr(p2) = G_CONSTANT i32 -54
+ %4:sgpr(p2) = G_CONSTANT i32 27
+ S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
+...
+
+---
+name: constant_v_p2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; WAVE64-LABEL: name: constant_v_p2
+ ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+ ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
+ ; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
+ ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
+ ;
+ ; WAVE32-LABEL: name: constant_v_p2
+ ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+ ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
+ ; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
+ ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
+ %0:vgpr(p2) = G_CONSTANT i32 0
+ %1:vgpr(p2) = G_CONSTANT i32 1
+ %2:vgpr(p2) = G_CONSTANT i32 -1
+ %3:vgpr(p2) = G_CONSTANT i32 -54
+ %4:vgpr(p2) = G_CONSTANT i32 27
+ S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
+...
+
+---
+name: constant_s_p5
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+
+ ; WAVE64-LABEL: name: constant_s_p5
+ ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
+ ; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
+ ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
+ ;
+ ; WAVE32-LABEL: name: constant_s_p5
+ ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
+ ; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
+ ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
+ %0:sgpr(p5) = G_CONSTANT i32 0
+ %1:sgpr(p5) = G_CONSTANT i32 1
+ %2:sgpr(p5) = G_CONSTANT i32 -1
+ %3:sgpr(p5) = G_CONSTANT i32 -54
+ %4:sgpr(p5) = G_CONSTANT i32 27
+ S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
+...
+
+---
+name: constant_v_p5
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; WAVE64-LABEL: name: constant_v_p5
+ ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+ ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
+ ; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
+ ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
+ ;
+ ; WAVE32-LABEL: name: constant_v_p5
+ ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+ ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
+ ; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
+ ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
+ %0:vgpr(p5) = G_CONSTANT i32 0
+ %1:vgpr(p5) = G_CONSTANT i32 1
+ %2:vgpr(p5) = G_CONSTANT i32 -1
+ %3:vgpr(p5) = G_CONSTANT i32 -54
+ %4:vgpr(p5) = G_CONSTANT i32 27
+ S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
+...
+
+---
+name: constant_s_p6
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+
+ ; WAVE64-LABEL: name: constant_s_p6
+ ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
+ ; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
+ ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
+ ;
+ ; WAVE32-LABEL: name: constant_s_p6
+ ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
+ ; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
+ ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
+ %0:sgpr(p6) = G_CONSTANT i32 0
+ %1:sgpr(p6) = G_CONSTANT i32 1
+ %2:sgpr(p6) = G_CONSTANT i32 -1
+ %3:sgpr(p6) = G_CONSTANT i32 -54
+ %4:sgpr(p6) = G_CONSTANT i32 27
+ S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
+...
+
+---
+name: constant_v_p6
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; WAVE64-LABEL: name: constant_v_p6
+ ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+ ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
+ ; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
+ ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
+ ;
+ ; WAVE32-LABEL: name: constant_v_p6
+ ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+ ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
+ ; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
+ ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
+ %0:vgpr(p6) = G_CONSTANT i32 0
+ %1:vgpr(p6) = G_CONSTANT i32 1
+ %2:vgpr(p6) = G_CONSTANT i32 -1
+ %3:vgpr(p6) = G_CONSTANT i32 -54
+ %4:vgpr(p6) = G_CONSTANT i32 27
+ S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4
+...
+
---
name: constant_s_p1
legalized: true
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir
index 942a0a3bd5ff7..13e29f15504be 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir
@@ -87,13 +87,13 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: fconstant_s_s64
- ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4607182418800017408
- ; GCN-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4620693217682128896
- ; GCN-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4611686018427387904
- ; GCN-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4601552919265804288
- ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_MOV_B]]
- ; GCN-NEXT: $sgpr2_sgpr3 = COPY [[S_MOV_B1]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]]
+ ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 4607182418800017408
+ ; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4620693217682128896
+ ; GCN-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -4611686018427387904
+ ; GCN-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4601552919265804288
+ ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_MOV_B64_]]
+ ; GCN-NEXT: $sgpr2_sgpr3 = COPY [[S_MOV_B]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B1]]
%0:sgpr(s64) = G_FCONSTANT double 1.0
%1:sgpr(s64) = G_FCONSTANT double 8.0
%2:sgpr(s64) = G_FCONSTANT double -2.0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
index 02c6220ac2aca..ada80da490fc5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
@@ -1374,11 +1374,11 @@ body: |
; GFX6: liveins: $sgpr0_sgpr1
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
@@ -1389,11 +1389,11 @@ body: |
; GFX7: liveins: $sgpr0_sgpr1
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
@@ -1404,11 +1404,11 @@ body: |
; GFX8: liveins: $sgpr0_sgpr1
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
+ ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
@@ -1419,11 +1419,11 @@ body: |
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
+ ; GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
@@ -1434,11 +1434,11 @@ body: |
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
+ ; GFX11-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
index 85ac53387f4d9..cf4e6c8b85e3e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
@@ -741,17 +741,15 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@@ -759,17 +757,15 @@ body: |
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@@ -777,17 +773,15 @@ body: |
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@@ -795,17 +789,15 @@ body: |
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX12-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX12-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
@@ -831,35 +823,31 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
- ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294971390
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
;
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
- ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294971390
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@@ -867,17 +855,15 @@ body: |
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
- ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294971390
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@@ -885,17 +871,15 @@ body: |
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
- ; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX12-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294971390
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX12-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
@@ -921,17 +905,15 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967295
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@@ -939,17 +921,15 @@ body: |
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967295
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@@ -957,17 +937,15 @@ body: |
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967295
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@@ -975,17 +953,15 @@ body: |
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX12-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967295
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX12-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
@@ -1010,17 +986,15 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@@ -1028,17 +1002,15 @@ body: |
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@@ -1046,17 +1018,15 @@ body: |
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
;
@@ -1064,17 +1034,15 @@ body: |
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX12-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX12-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX12-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
index 4309f4863bf91..6f971788727b2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
@@ -45,9 +45,7 @@ regBankSelected: true
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0
# Max immediate for CI
-# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
-# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3
-# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
+# SIVI: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 17179869180
# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
@@ -59,9 +57,7 @@ regBankSelected: true
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0
# Immediate overflow for CI
-# GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-# GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4
-# GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
+# GCN: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 17179869184
# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
# GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
@@ -77,9 +73,7 @@ regBankSelected: true
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0
# Overflow 32-bit byte offset
-# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1
-# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
+# SIVI: [[K:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
index 2cfbb68b8affb..2a3d97d603b13 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
@@ -314,8 +314,8 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B64_]], implicit-def dead $scc
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
%0:sgpr(p0) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 0
@@ -337,10 +337,8 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136
- ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1085102592571150096
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B]], implicit-def dead $scc
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
%0:sgpr(p0) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096
@@ -362,9 +360,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
- ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def dead $scc
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
%0:sgpr(p0) = COPY $sgpr0_sgpr1
@@ -387,9 +383,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -4294967296
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
@@ -416,9 +410,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967296
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def dead $scc
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
%0:sgpr(p0) = COPY $sgpr0_sgpr1
@@ -441,7 +433,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -2
+ ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
@@ -468,7 +460,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -4
+ ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
@@ -495,7 +487,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -8
+ ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
@@ -522,7 +514,7 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -16
+ ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -16
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
@@ -743,17 +735,15 @@ body: |
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4042322160, implicit $exec
- ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec
- ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -1085102592571150096, implicit $exec
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec
- ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -1085102592571150096
%2:vgpr(p0) = G_PTRMASK %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
index d1fa579ca9b4b..614f59c564df6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
@@ -906,28 +906,26 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[4:5], s[2:3], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT: s_mov_b32 s16, 0xb36211c7
; GFX11-NEXT: s_mov_b32 s6, 2.0
-; GFX11-NEXT: s_movk_i32 s17, 0x102
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_dual_mov_b32 v9, s16 :: v_dual_lshlrev_b32 v2, 2, v0
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x34
; GFX11-NEXT: s_mov_b32 s8, 0x40400000
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_dual_mov_b32 v9, 0xb36211c7 :: v_dual_lshlrev_b32 v2, 2, v0
; GFX11-NEXT: s_mov_b32 s12, 0x40c00000
; GFX11-NEXT: s_mov_b32 s10, 0x40a00000
; GFX11-NEXT: s_mov_b32 s9, 4.0
; GFX11-NEXT: s_mov_b32 s14, 0x41000000
; GFX11-NEXT: s_mov_b32 s13, 0x40e00000
-; GFX11-NEXT: v_dual_mov_b32 v10, s17 :: v_dual_mov_b32 v3, s8
; GFX11-NEXT: v_mov_b32_e32 v6, s12
-; GFX11-NEXT: v_mov_b32_e32 v4, s9
-; GFX11-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v5, s10
+; GFX11-NEXT: v_bfrev_b32_e32 v10, 4.0
+; GFX11-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v3, s8
+; GFX11-NEXT: v_dual_mov_b32 v4, s9 :: v_dual_mov_b32 v7, s13
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT: v_mov_b32_e32 v1, s5
; GFX11-NEXT: s_mov_b32 s4, 0
; GFX11-NEXT: s_mov_b32 s5, 1.0
-; GFX11-NEXT: v_mov_b32_e32 v7, s13
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX11-NEXT: flat_load_b32 v11, v[0:1]
@@ -1012,17 +1010,16 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[4:5], s[2:3], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT: s_mov_b32 s12, 0xb36211c6
; GFX11-NEXT: s_mov_b32 s6, 2.0
-; GFX11-NEXT: s_movk_i32 s13, 0x102
; GFX11-NEXT: s_mov_b32 s8, 0x42004600
-; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX11-NEXT: s_mov_b32 s9, 0x44004700
; GFX11-NEXT: s_mov_b32 s10, 0x45004800
-; GFX11-NEXT: v_dual_mov_b32 v6, s12 :: v_dual_mov_b32 v3, s8
-; GFX11-NEXT: v_mov_b32_e32 v7, s13
+; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x34
-; GFX11-NEXT: v_dual_mov_b32 v4, s9 :: v_dual_mov_b32 v5, s10
+; GFX11-NEXT: v_mov_b32_e32 v6, 0xb36211c6
+; GFX11-NEXT: v_bfrev_b32_e32 v7, 4.0
+; GFX11-NEXT: v_mov_b32_e32 v3, s8
+; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v4, s9
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX11-NEXT: s_mov_b32 s4, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
index 4d012796693cb..79164037bc6aa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
@@ -326,7 +326,6 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
; GCN-NEXT: v_pk_mov_b32 v[0:1], s[10:11], s[10:11] op_sel:[0,1]
; GCN-NEXT: s_mov_b64 s[4:5], s[0:1]
; GCN-NEXT: v_accvgpr_write_b32 a0, s0
-; GCN-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1]
; GCN-NEXT: v_accvgpr_write_b32 a1, s1
; GCN-NEXT: v_accvgpr_write_b32 a2, s2
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
@@ -334,6 +333,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
; GCN-NEXT: v_accvgpr_write_b32 a5, s5
; GCN-NEXT: v_accvgpr_write_b32 a6, s6
; GCN-NEXT: v_accvgpr_write_b32 a7, s7
+; GCN-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1]
; GCN-NEXT: s_nop 1
; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
; GCN-NEXT: v_mov_b32_e32 v0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
index 3def36766fbe0..9ef54ed724ec0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
@@ -77,11 +77,12 @@ define double @v_rsq_clamp_f64(double %src) #0 {
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
-; VI-NEXT: s_mov_b32 s4, -1
-; VI-NEXT: s_mov_b32 s5, 0x7fefffff
-; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
-; VI-NEXT: s_mov_b32 s5, 0xffefffff
-; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
+; VI-NEXT: v_mov_b32_e32 v2, -1
+; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff
+; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; VI-NEXT: v_mov_b32_e32 v2, -1
+; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff
+; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_f64:
@@ -92,13 +93,14 @@ define double @v_rsq_clamp_f64(double %src) #0 {
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
-; GFX12-NEXT: s_mov_b32 s0, -1
-; GFX12-NEXT: s_mov_b32 s1, 0x7fefffff
-; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT: v_min_num_f64_e32 v[0:1], s[0:1], v[0:1]
-; GFX12-NEXT: s_mov_b32 s1, 0xffefffff
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], s[0:1], v[0:1]
+; GFX12-NEXT: v_mov_b32_e32 v2, -1
+; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff
+; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-NEXT: v_mov_b32_e32 v2, -1
+; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
ret double %rsq_clamp
@@ -115,11 +117,12 @@ define double @v_rsq_clamp_fabs_f64(double %src) #0 {
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]|
-; VI-NEXT: s_mov_b32 s4, -1
-; VI-NEXT: s_mov_b32 s5, 0x7fefffff
-; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
-; VI-NEXT: s_mov_b32 s5, 0xffefffff
-; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
+; VI-NEXT: v_mov_b32_e32 v2, -1
+; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff
+; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; VI-NEXT: v_mov_b32_e32 v2, -1
+; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff
+; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_fabs_f64:
@@ -130,13 +133,14 @@ define double @v_rsq_clamp_fabs_f64(double %src) #0 {
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]|
-; GFX12-NEXT: s_mov_b32 s0, -1
-; GFX12-NEXT: s_mov_b32 s1, 0x7fefffff
-; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT: v_min_num_f64_e32 v[0:1], s[0:1], v[0:1]
-; GFX12-NEXT: s_mov_b32 s1, 0xffefffff
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], s[0:1], v[0:1]
+; GFX12-NEXT: v_mov_b32_e32 v2, -1
+; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff
+; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-NEXT: v_mov_b32_e32 v2, -1
+; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fabs.src = call double @llvm.fabs.f64(double %src)
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src)
@@ -185,11 +189,12 @@ define double @v_rsq_clamp_undef_f64() #0 {
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f64_e32 v[0:1], s[4:5]
-; VI-NEXT: s_mov_b32 s4, -1
-; VI-NEXT: s_mov_b32 s5, 0x7fefffff
-; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
-; VI-NEXT: s_mov_b32 s5, 0xffefffff
-; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
+; VI-NEXT: v_mov_b32_e32 v2, -1
+; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff
+; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; VI-NEXT: v_mov_b32_e32 v2, -1
+; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff
+; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_undef_f64:
@@ -200,13 +205,14 @@ define double @v_rsq_clamp_undef_f64() #0 {
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
-; GFX12-NEXT: s_mov_b32 s0, -1
-; GFX12-NEXT: s_mov_b32 s1, 0x7fefffff
-; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT: v_min_num_f64_e32 v[0:1], s[0:1], v[0:1]
-; GFX12-NEXT: s_mov_b32 s1, 0xffefffff
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], s[0:1], v[0:1]
+; GFX12-NEXT: v_mov_b32_e32 v2, -1
+; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff
+; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-NEXT: v_mov_b32_e32 v2, -1
+; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef)
ret double %rsq_clamp
@@ -254,11 +260,12 @@ define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
-; VI-NEXT: s_mov_b32 s4, -1
-; VI-NEXT: s_mov_b32 s5, 0x7fefffff
-; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
-; VI-NEXT: s_mov_b32 s5, 0xffefffff
-; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
+; VI-NEXT: v_mov_b32_e32 v2, -1
+; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff
+; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; VI-NEXT: v_mov_b32_e32 v2, -1
+; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff
+; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_rsq_clamp_f64_non_ieee:
@@ -269,13 +276,14 @@ define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
-; GFX12-NEXT: s_mov_b32 s0, -1
-; GFX12-NEXT: s_mov_b32 s1, 0x7fefffff
-; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT: v_min_num_f64_e32 v[0:1], s[0:1], v[0:1]
-; GFX12-NEXT: s_mov_b32 s1, 0xffefffff
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT: v_max_num_f64_e32 v[0:1], s[0:1], v[0:1]
+; GFX12-NEXT: v_mov_b32_e32 v2, -1
+; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff
+; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-NEXT: v_mov_b32_e32 v2, -1
+; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
ret double %rsq_clamp
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
index 5074f8814546e..8f88aaedf7e95 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
@@ -117,10 +117,8 @@ define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
; GCN-LABEL: set_inactive_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
-; GCN-NEXT: s_mov_b32 s4, 0xcccccccd
-; GCN-NEXT: s_mov_b32 s5, 0x4010cccc
-; GCN-NEXT: v_mov_b32_e32 v2, s4
-; GCN-NEXT: v_mov_b32_e32 v3, s5
+; GCN-NEXT: v_mov_b32_e32 v2, 0xcccccccd
+; GCN-NEXT: v_mov_b32_e32 v3, 0x4010cccc
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: v_mov_b32_e32 v1, s3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll
index 3edd2e0914a6e..7cd3babc70909 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll
@@ -7,12 +7,11 @@ declare void @llvm.memset.p1.i32(ptr addrspace(1), i8, i32, i1)
define amdgpu_cs void @memset_p1i8(ptr addrspace(1) %dst, i8 %val) {
; LOOP-LABEL: memset_p1i8:
; LOOP: ; %bb.0: ; %loadstoreloop.preheader
-; LOOP-NEXT: s_mov_b64 s[4:5], 0
+; LOOP-NEXT: s_mov_b64 s[0:1], 0
; LOOP-NEXT: s_mov_b32 s2, 0
; LOOP-NEXT: s_mov_b32 s3, 0xf000
-; LOOP-NEXT: s_mov_b64 s[0:1], 0
-; LOOP-NEXT: v_mov_b32_e32 v3, s4
-; LOOP-NEXT: v_mov_b32_e32 v4, s5
+; LOOP-NEXT: v_mov_b32_e32 v4, s1
+; LOOP-NEXT: v_mov_b32_e32 v3, s0
; LOOP-NEXT: .LBB0_1: ; %loadstoreloop
; LOOP-NEXT: ; =>This Inner Loop Header: Depth=1
; LOOP-NEXT: v_add_i32_e32 v5, vcc, v0, v3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
index 48217b9bb0d93..4c34209752c01 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
@@ -76,29 +76,25 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(ptr addrspace(1) inreg %p
define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_mov_b32 s4, 0
-; GFX6-NEXT: s_mov_b32 s5, 4
-; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: v_mov_b32_e32 v0, 0
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-NEXT: v_mov_b32_e32 v1, 4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: s_mov_b32 s2, s4
-; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_mov_b32 s4, 0
-; GFX7-NEXT: s_mov_b32 s5, 4
-; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: v_mov_b32_e32 v1, 4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: s_mov_b32 s2, s4
-; GFX7-NEXT: v_mov_b32_e32 v1, s5
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_endpgm
;
@@ -120,29 +116,25 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(ptr addrspace(1) in
define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(ptr addrspace(1) inreg %ptr) {
; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_mov_b32 s4, 4
-; GFX6-NEXT: s_mov_b32 s5, s4
-; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: v_mov_b32_e32 v0, 4
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-NEXT: v_mov_b32_e32 v1, 4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_mov_b32 s4, 4
-; GFX7-NEXT: s_mov_b32 s5, s4
-; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: v_mov_b32_e32 v0, 4
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: v_mov_b32_e32 v1, 4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: v_mov_b32_e32 v1, s5
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_endpgm
;
@@ -234,9 +226,9 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(ptr addrspace(1) %p
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s0, 0
; GFX6-NEXT: s_mov_b32 s1, 4
+; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: s_mov_b32 s2, s0
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX6-NEXT: s_endpgm
;
@@ -244,21 +236,16 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(ptr addrspace(1) %p
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s0, 0
; GFX7-NEXT: s_mov_b32 s1, 4
+; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: s_mov_b32 s2, s0
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_endpgm
;
; GFX12-LABEL: mubuf_store_vgpr_ptr_offset4294967296:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_mov_b32 s0, 0
-; GFX12-NEXT: s_mov_b32 s1, 4
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_nop 0
@@ -273,7 +260,7 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s0, 4
-; GFX6-NEXT: s_mov_b32 s1, s0
+; GFX6-NEXT: s_mov_b32 s1, 4
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
@@ -283,7 +270,7 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s0, 4
-; GFX7-NEXT: s_mov_b32 s1, s0
+; GFX7-NEXT: s_mov_b32 s1, 4
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
@@ -292,13 +279,8 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
;
; GFX12-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_mov_b32 s0, 4
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT: s_mov_b32 s1, s0
-; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 4
+; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_nop 0
@@ -715,28 +697,24 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(ptr addrspace(1) inreg %p
define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_mov_b32 s4, 0
-; GFX6-NEXT: s_mov_b32 s5, 4
-; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: v_mov_b32_e32 v0, 0
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: v_mov_b32_e32 v1, 4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: s_mov_b32 s2, s4
-; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_mov_b32 s4, 0
-; GFX7-NEXT: s_mov_b32 s5, 4
-; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: v_mov_b32_e32 v1, 4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: s_mov_b32 s2, s4
-; GFX7-NEXT: v_mov_b32_e32 v1, s5
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
@@ -758,28 +736,24 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(ptr addrspace(1) in
define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(ptr addrspace(1) inreg %ptr) {
; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_mov_b32 s4, 4
-; GFX6-NEXT: s_mov_b32 s5, s4
-; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: v_mov_b32_e32 v0, 4
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: v_mov_b32_e32 v1, 4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_mov_b32 s4, 4
-; GFX7-NEXT: s_mov_b32 s5, s4
-; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: v_mov_b32_e32 v0, 4
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: v_mov_b32_e32 v1, 4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: v_mov_b32_e32 v1, s5
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
@@ -868,8 +842,8 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(ptr addrspace(1) %p
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s0, 0
; GFX6-NEXT: s_mov_b32 s1, 4
+; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: s_mov_b32 s2, s0
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
@@ -878,21 +852,16 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(ptr addrspace(1) %p
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s0, 0
; GFX7-NEXT: s_mov_b32 s1, 4
+; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: s_mov_b32 s2, s0
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: mubuf_load_vgpr_ptr_offset4294967296:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_mov_b32 s0, 0
-; GFX12-NEXT: s_mov_b32 s1, 4
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
; GFX12-NEXT: global_load_b32 v0, v[0:1], off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
@@ -905,7 +874,7 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s0, 4
-; GFX6-NEXT: s_mov_b32 s1, s0
+; GFX6-NEXT: s_mov_b32 s1, 4
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
@@ -915,7 +884,7 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s0, 4
-; GFX7-NEXT: s_mov_b32 s1, s0
+; GFX7-NEXT: s_mov_b32 s1, 4
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
@@ -924,13 +893,8 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(ptr addrspace(1) %p
;
; GFX12-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_mov_b32 s0, 4
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT: s_mov_b32 s1, s0
-; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 4
+; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
; GFX12-NEXT: global_load_b32 v0, v[0:1], off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
@@ -1307,15 +1271,13 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(ptr addrspace(1) inr
define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_mov_b32 s4, 0
-; GFX6-NEXT: s_mov_b32 s5, 4
-; GFX6-NEXT: v_mov_b32_e32 v1, s4
+; GFX6-NEXT: v_mov_b32_e32 v1, 0
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: v_mov_b32_e32 v0, 2
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: v_mov_b32_e32 v2, 4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: s_mov_b32 s2, s4
-; GFX6-NEXT: v_mov_b32_e32 v2, s5
; GFX6-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
@@ -1324,15 +1286,13 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(ptr addrspace(
;
; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_mov_b32 s4, 0
-; GFX7-NEXT: s_mov_b32 s5, 4
-; GFX7-NEXT: v_mov_b32_e32 v1, s4
+; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
; GFX7-NEXT: v_mov_b32_e32 v0, 2
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: v_mov_b32_e32 v2, 4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: s_mov_b32 s2, s4
-; GFX7-NEXT: v_mov_b32_e32 v2, s5
; GFX7-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1
@@ -1404,8 +1364,8 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(
; GFX6-NEXT: s_mov_b32 s0, 0
; GFX6-NEXT: s_mov_b32 s1, 4
; GFX6-NEXT: v_mov_b32_e32 v2, 2
+; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: s_mov_b32 s2, s0
; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
@@ -1418,8 +1378,8 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(
; GFX7-NEXT: s_mov_b32 s0, 0
; GFX7-NEXT: s_mov_b32 s1, 4
; GFX7-NEXT: v_mov_b32_e32 v2, 2
+; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: s_mov_b32 s2, s0
; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1
@@ -1428,13 +1388,8 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(
;
; GFX12-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_mov_b32 s0, 0
-; GFX12-NEXT: s_mov_b32 s1, 4
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
+; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
; GFX12-NEXT: v_mov_b32_e32 v2, 2
; GFX12-NEXT: global_wb scope:SCOPE_DEV
; GFX12-NEXT: global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
@@ -1549,15 +1504,13 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(ptr addrspace(1) inreg
define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr, i32 %old, i32 %in) {
; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_mov_b32 s4, 0
-; GFX6-NEXT: s_mov_b32 s5, 4
-; GFX6-NEXT: v_mov_b32_e32 v3, s4
+; GFX6-NEXT: v_mov_b32_e32 v3, 0
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: v_mov_b32_e32 v2, v0
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: v_mov_b32_e32 v4, 4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: s_mov_b32 s2, s4
-; GFX6-NEXT: v_mov_b32_e32 v4, s5
; GFX6-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
@@ -1567,15 +1520,13 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(ptr addrspace(1)
;
; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_mov_b32 s4, 0
-; GFX7-NEXT: s_mov_b32 s5, 4
-; GFX7-NEXT: v_mov_b32_e32 v3, s4
+; GFX7-NEXT: v_mov_b32_e32 v3, 0
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
; GFX7-NEXT: v_mov_b32_e32 v2, v0
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: v_mov_b32_e32 v4, 4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: s_mov_b32 s2, s4
-; GFX7-NEXT: v_mov_b32_e32 v4, s5
; GFX7-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1
@@ -1649,8 +1600,8 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1)
; GFX6-NEXT: s_mov_b32 s0, 0
; GFX6-NEXT: v_mov_b32_e32 v4, v2
; GFX6-NEXT: s_mov_b32 s1, 4
+; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: s_mov_b32 s2, s0
; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
@@ -1663,8 +1614,8 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1)
; GFX7-NEXT: s_mov_b32 s0, 0
; GFX7-NEXT: v_mov_b32_e32 v4, v2
; GFX7-NEXT: s_mov_b32 s1, 4
+; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: s_mov_b32 s2, s0
; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1
@@ -1673,13 +1624,9 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1)
;
; GFX12-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_mov_b32 s0, 0
-; GFX12-NEXT: s_mov_b32 s1, 4
+; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, 0
; GFX12-NEXT: v_mov_b32_e32 v4, v2
-; GFX12-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v5, s0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v5
-; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v6, vcc_lo
+; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 4, v1, vcc_lo
; GFX12-NEXT: global_wb scope:SCOPE_DEV
; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v[0:1], v[3:4], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 377fa24cb4755..404e726246f4d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -218,6 +218,7 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
; CHECK-NEXT: s_subb_u32 s5, 0, s11
+; CHECK-NEXT: s_xor_b64 s[6:7], s[6:7], s[8:9]
; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; CHECK-NEXT: v_trunc_f32_e32 v2, v1
@@ -326,10 +327,9 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; CHECK-NEXT: s_xor_b64 s[0:1], s[6:7], s[8:9]
-; CHECK-NEXT: v_xor_b32_e32 v0, s0, v0
-; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0
+; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0
; CHECK-NEXT: s_mov_b32 s0, 0
+; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 83ebc84e1f84a..5b94e71ecf52e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -193,7 +193,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: s_mov_b32 s7, -1
; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[0:1], 0
-; CHECK-NEXT: s_mov_b32 s0, 1
+; CHECK-NEXT: s_mov_b32 s7, 1
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: s_ashr_i32 s6, s3, 31
@@ -212,6 +212,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
; CHECK-NEXT: s_subb_u32 s5, 0, s9
+; CHECK-NEXT: s_mov_b32 s7, 0
; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; CHECK-NEXT: v_trunc_f32_e32 v2, v1
@@ -272,43 +273,43 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
; CHECK-NEXT: v_mul_lo_u32 v2, s11, v0
; CHECK-NEXT: v_mul_lo_u32 v3, s10, v1
-; CHECK-NEXT: v_mul_hi_u32 v4, s10, v0
+; CHECK-NEXT: v_mul_hi_u32 v5, s10, v0
; CHECK-NEXT: v_mul_hi_u32 v0, s11, v0
-; CHECK-NEXT: v_mul_hi_u32 v5, s11, v1
+; CHECK-NEXT: v_mul_hi_u32 v6, s11, v1
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v4, s11, v1
+; CHECK-NEXT: v_mul_lo_u32 v5, s11, v1
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_mul_hi_u32 v3, s10, v1
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0
-; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v0, v2
-; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v0, v2
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v5, 0
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2
; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v2, v[1:2]
-; CHECK-NEXT: v_mov_b32_e32 v5, s11
+; CHECK-NEXT: v_mov_b32_e32 v3, s11
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s10, v0
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2]
-; CHECK-NEXT: v_mov_b32_e32 v3, s9
-; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v5, v1, vcc
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v5, v[1:2]
+; CHECK-NEXT: v_mov_b32_e32 v4, s9
+; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v3, v1, vcc
; CHECK-NEXT: v_sub_i32_e64 v1, s[0:1], s11, v1
-; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2
-; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0
-; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1]
+; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1]
; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0
-; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1]
; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v2
+; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v5, s[0:1]
+; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0
+; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s9, v1
-; CHECK-NEXT: v_cndmask_b32_e64 v2, v4, v5, s[0:1]
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s8, v3
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
@@ -321,12 +322,11 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0
-; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
-; CHECK-NEXT: s_xor_b32 s0, s0, 1
+; CHECK-NEXT: s_xor_b32 s0, s7, 1
; CHECK-NEXT: s_and_b32 s0, s0, 1
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index d15551365707b..e31d8e95bd608 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -191,7 +191,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: s_mov_b32 s7, -1
; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0
-; CHECK-NEXT: s_mov_b32 s4, 1
+; CHECK-NEXT: s_mov_b32 s6, 1
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
; CHECK-NEXT: ; %bb.1:
@@ -199,6 +199,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3
; CHECK-NEXT: s_sub_u32 s4, 0, s2
; CHECK-NEXT: v_mov_b32_e32 v3, s1
+; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2
; CHECK-NEXT: s_subb_u32 s5, 0, s3
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
@@ -317,12 +318,11 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
-; CHECK-NEXT: s_xor_b32 s1, s4, 1
+; CHECK-NEXT: s_xor_b32 s1, s6, 1
; CHECK-NEXT: s_and_b32 s1, s1, 1
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index cc0f7e2ca5a54..f30b278b3e611 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -188,13 +188,14 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: s_mov_b32 s7, -1
; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0
-; CHECK-NEXT: s_mov_b32 s4, 1
+; CHECK-NEXT: s_mov_b32 s6, 1
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: v_mov_b32_e32 v0, s3
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3
; CHECK-NEXT: s_sub_u32 s4, 0, s2
+; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: v_mov_b32_e32 v3, s1
; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2
; CHECK-NEXT: s_subb_u32 s5, 0, s3
@@ -313,12 +314,11 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
-; CHECK-NEXT: s_xor_b32 s1, s4, 1
+; CHECK-NEXT: s_xor_b32 s1, s6, 1
; CHECK-NEXT: s_and_b32 s1, s1, 1
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index eeddc2211ea97..4215ae43345fd 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -310,8 +310,8 @@ ret:
; GFX11-LABEL: tied_operand_test:
; GFX11: ; %bb.0: ; %entry
-; GFX11-DAG: scratch_load_u16 [[LDRESULT:v[0-9]+]], off, off
-; GFX11-DAG: v_mov_b32_e32 [[C:v[0-9]+]], 0x7b
+; GFX11: scratch_load_u16 [[LDRESULT:v[0-9]+]], off, off
+; GFX11: v_dual_mov_b32 [[C:v[0-9]+]], 0x7b :: v_dual_mov_b32 v{{[0-9]+}}, s{{[0-9]+}}
; GFX11-DAG: ds_store_b16 v{{[0-9]+}}, [[LDRESULT]] offset:10
; GFX11-DAG: ds_store_b16 v{{[0-9]+}}, [[C]] offset:8
; GFX11-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
index b62bf890e65fe..247ec407df5fd 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
@@ -974,19 +974,19 @@ define amdgpu_kernel void @bit4_inselt(ptr addrspace(1) %out, <4 x i1> %vec, i32
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_and_b32 s3, s3, 3
-; GCN-NEXT: v_mov_b32_e32 v1, s2
; GCN-NEXT: v_lshrrev_b16_e64 v2, 1, s2
-; GCN-NEXT: v_lshrrev_b16_e64 v3, 2, s2
-; GCN-NEXT: v_lshrrev_b16_e64 v4, 3, s2
+; GCN-NEXT: v_mov_b32_e32 v3, s2
+; GCN-NEXT: v_lshrrev_b16_e64 v4, 2, s2
+; GCN-NEXT: v_lshrrev_b16_e64 v5, 3, s2
+; GCN-NEXT: v_mov_b32_e32 v1, 1
; GCN-NEXT: v_or_b32_e32 v0, s3, v0
; GCN-NEXT: v_and_b32_e32 v2, 1, v2
-; GCN-NEXT: v_and_b32_e32 v3, 3, v3
-; GCN-NEXT: v_and_b32_e32 v4, 1, v4
-; GCN-NEXT: buffer_store_byte v1, off, s[12:15], 0
-; GCN-NEXT: buffer_store_byte v4, off, s[12:15], 0 offset:3
-; GCN-NEXT: buffer_store_byte v3, off, s[12:15], 0 offset:2
+; GCN-NEXT: v_and_b32_e32 v4, 3, v4
+; GCN-NEXT: v_and_b32_e32 v5, 1, v5
+; GCN-NEXT: buffer_store_byte v3, off, s[12:15], 0
+; GCN-NEXT: buffer_store_byte v5, off, s[12:15], 0 offset:3
+; GCN-NEXT: buffer_store_byte v4, off, s[12:15], 0 offset:2
; GCN-NEXT: buffer_store_byte v2, off, s[12:15], 0 offset:1
-; GCN-NEXT: v_mov_b32_e32 v1, 1
; GCN-NEXT: buffer_store_byte v1, v0, s[12:15], 0 offen
; GCN-NEXT: buffer_load_ubyte v0, off, s[12:15], 0
; GCN-NEXT: buffer_load_ubyte v1, off, s[12:15], 0 offset:1
diff --git a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
index 266ab687cd8d5..d51ace630f692 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
@@ -94,14 +94,14 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_normal(i32 %idx) {
; CHECK-LABEL: module_0_kernel_normal_extern_normal:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 2
+; CHECK-NEXT: v_mov_b32_e32 v0, 2
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
; CHECK-NEXT: s_add_i32 s0, s0, 4
; CHECK-NEXT: v_mov_b32_e32 v2, s0
-; CHECK-NEXT: ds_write_b16 v0, v1
-; CHECK-NEXT: ds_write_b32 v2, v0
+; CHECK-NEXT: ds_write_b16 v1, v0
+; CHECK-NEXT: ds_write_b32 v2, v1
; CHECK-NEXT: s_endpgm
store i16 2, ptr addrspace(3) @kernel_normal
@@ -134,14 +134,14 @@ define amdgpu_kernel void @module_1_kernel_normal_extern_normal(i32 %idx) {
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_add_i32 s4, s4, 4
; CHECK-NEXT: v_mov_b32_e32 v2, 2
; CHECK-NEXT: v_mov_b32_e32 v3, s4
-; CHECK-NEXT: ds_write_b16 v0, v1
-; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
-; CHECK-NEXT: ds_write_b32 v3, v0
+; CHECK-NEXT: ds_write_b16 v1, v0
+; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
+; CHECK-NEXT: ds_write_b32 v3, v1
; CHECK-NEXT: s_endpgm
call void @use_module()
store i16 1, ptr addrspace(3) @module_variable
@@ -157,14 +157,14 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_normal(i32 %idx) {
; CHECK-LABEL: module_0_kernel_overalign_extern_normal:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 2
+; CHECK-NEXT: v_mov_b32_e32 v0, 2
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
; CHECK-NEXT: s_add_i32 s0, s0, 4
; CHECK-NEXT: v_mov_b32_e32 v2, s0
-; CHECK-NEXT: ds_write_b16 v0, v1
-; CHECK-NEXT: ds_write_b32 v2, v0
+; CHECK-NEXT: ds_write_b16 v1, v0
+; CHECK-NEXT: ds_write_b32 v2, v1
; CHECK-NEXT: s_endpgm
store i16 2, ptr addrspace(3) @kernel_overalign
@@ -197,14 +197,14 @@ define amdgpu_kernel void @module_1_kernel_overalign_extern_normal(i32 %idx) {
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_add_i32 s4, s4, 8
; CHECK-NEXT: v_mov_b32_e32 v2, 2
; CHECK-NEXT: v_mov_b32_e32 v3, s4
-; CHECK-NEXT: ds_write_b16 v0, v1
-; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
-; CHECK-NEXT: ds_write_b32 v3, v0
+; CHECK-NEXT: ds_write_b16 v1, v0
+; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
+; CHECK-NEXT: ds_write_b32 v3, v1
; CHECK-NEXT: s_endpgm
call void @use_module()
store i16 1, ptr addrspace(3) @module_variable
@@ -220,14 +220,14 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_overalign(i32 %idx) {
; CHECK-LABEL: module_0_kernel_normal_extern_overalign:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 2
+; CHECK-NEXT: v_mov_b32_e32 v0, 2
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
; CHECK-NEXT: s_add_i32 s0, s0, 8
; CHECK-NEXT: v_mov_b32_e32 v2, s0
-; CHECK-NEXT: ds_write_b16 v0, v1
-; CHECK-NEXT: ds_write_b32 v2, v0
+; CHECK-NEXT: ds_write_b16 v1, v0
+; CHECK-NEXT: ds_write_b32 v2, v1
; CHECK-NEXT: s_endpgm
store i16 2, ptr addrspace(3) @kernel_normal
@@ -260,14 +260,14 @@ define amdgpu_kernel void @module_1_kernel_normal_extern_overalign(i32 %idx) {
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_add_i32 s4, s4, 8
; CHECK-NEXT: v_mov_b32_e32 v2, 2
; CHECK-NEXT: v_mov_b32_e32 v3, s4
-; CHECK-NEXT: ds_write_b16 v0, v1
-; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
-; CHECK-NEXT: ds_write_b32 v3, v0
+; CHECK-NEXT: ds_write_b16 v1, v0
+; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
+; CHECK-NEXT: ds_write_b32 v3, v1
; CHECK-NEXT: s_endpgm
call void @use_module()
store i16 1, ptr addrspace(3) @module_variable
@@ -283,14 +283,14 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_overalign(i32 %idx)
; CHECK-LABEL: module_0_kernel_overalign_extern_overalign:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x0
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 2
+; CHECK-NEXT: v_mov_b32_e32 v0, 2
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
; CHECK-NEXT: s_add_i32 s0, s0, 8
; CHECK-NEXT: v_mov_b32_e32 v2, s0
-; CHECK-NEXT: ds_write_b16 v0, v1
-; CHECK-NEXT: ds_write_b32 v2, v0
+; CHECK-NEXT: ds_write_b16 v1, v0
+; CHECK-NEXT: ds_write_b32 v2, v1
; CHECK-NEXT: s_endpgm
store i16 2, ptr addrspace(3) @kernel_overalign
@@ -323,14 +323,14 @@ define amdgpu_kernel void @module_1_kernel_overalign_extern_overalign(i32 %idx)
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
; CHECK-NEXT: s_lshl_b32 s4, s15, 2
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_add_i32 s4, s4, 8
; CHECK-NEXT: v_mov_b32_e32 v2, 2
; CHECK-NEXT: v_mov_b32_e32 v3, s4
-; CHECK-NEXT: ds_write_b16 v0, v1
-; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
-; CHECK-NEXT: ds_write_b32 v3, v0
+; CHECK-NEXT: ds_write_b16 v1, v0
+; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
+; CHECK-NEXT: ds_write_b32 v3, v1
; CHECK-NEXT: s_endpgm
call void @use_module()
store i16 1, ptr addrspace(3) @module_variable
@@ -368,11 +368,11 @@ define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_normal(i32 %id
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; CHECK-NEXT: v_mov_b32_e32 v3, 0
-; CHECK-NEXT: v_mov_b32_e32 v4, 2
+; CHECK-NEXT: v_mov_b32_e32 v3, 2
+; CHECK-NEXT: v_mov_b32_e32 v4, 0
; CHECK-NEXT: s_mov_b32 s15, 0
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
-; CHECK-NEXT: ds_write_b16 v3, v4
+; CHECK-NEXT: ds_write_b16 v4, v3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@@ -408,12 +408,12 @@ define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_normal(i32 %id
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_normal at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_normal at gotpcrel32@hi+12
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
-; CHECK-NEXT: v_mov_b32_e32 v1, 1
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: v_mov_b32_e32 v2, 2
-; CHECK-NEXT: ds_write_b16 v0, v1
-; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
+; CHECK-NEXT: ds_write_b16 v1, v0
+; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@@ -445,11 +445,11 @@ define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_normal(i32
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; CHECK-NEXT: v_mov_b32_e32 v3, 0
-; CHECK-NEXT: v_mov_b32_e32 v4, 2
+; CHECK-NEXT: v_mov_b32_e32 v3, 2
+; CHECK-NEXT: v_mov_b32_e32 v4, 0
; CHECK-NEXT: s_mov_b32 s15, 2
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
-; CHECK-NEXT: ds_write_b16 v3, v4
+; CHECK-NEXT: ds_write_b16 v4, v3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@@ -485,12 +485,12 @@ define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_normal(i32
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_normal at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_normal at gotpcrel32@hi+12
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
-; CHECK-NEXT: v_mov_b32_e32 v1, 1
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: v_mov_b32_e32 v2, 2
-; CHECK-NEXT: ds_write_b16 v0, v1
-; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
+; CHECK-NEXT: ds_write_b16 v1, v0
+; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@@ -522,11 +522,11 @@ define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_overalign(i32
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; CHECK-NEXT: v_mov_b32_e32 v3, 0
-; CHECK-NEXT: v_mov_b32_e32 v4, 2
+; CHECK-NEXT: v_mov_b32_e32 v3, 2
+; CHECK-NEXT: v_mov_b32_e32 v4, 0
; CHECK-NEXT: s_mov_b32 s15, 1
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
-; CHECK-NEXT: ds_write_b16 v3, v4
+; CHECK-NEXT: ds_write_b16 v4, v3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@@ -562,12 +562,12 @@ define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_overalign(i32
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_overalign at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_overalign at gotpcrel32@hi+12
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
-; CHECK-NEXT: v_mov_b32_e32 v1, 1
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: v_mov_b32_e32 v2, 2
-; CHECK-NEXT: ds_write_b16 v0, v1
-; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
+; CHECK-NEXT: ds_write_b16 v1, v0
+; CHECK-NEXT: ds_write_b16 v1, v2 offset:2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@@ -599,11 +599,11 @@ define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_overalign(i
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; CHECK-NEXT: v_mov_b32_e32 v3, 0
-; CHECK-NEXT: v_mov_b32_e32 v4, 2
+; CHECK-NEXT: v_mov_b32_e32 v3, 2
+; CHECK-NEXT: v_mov_b32_e32 v4, 0
; CHECK-NEXT: s_mov_b32 s15, 3
; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
-; CHECK-NEXT: ds_write_b16 v3, v4
+; CHECK-NEXT: ds_write_b16 v4, v3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
@@ -639,12 +639,12 @@ define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_overalign(i
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, use_extern_overalign at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_overalign at gotpcrel32@hi+12
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
-; CHECK-NEXT: v_mov_b32_e32 v1, 1
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: v_mov_b32_e32 v2, 2
-; CHECK-NEXT: ds_write_b16 v0, v1
-; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
+; CHECK-NEXT: ds_write_b16 v1, v0
+; CHECK-NEXT: ds_write_b16 v1, v2 offset:4
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
; CHECK-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
index 7998d430d5f90..0fb9e2572446b 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
@@ -19,18 +19,18 @@ $_f2 = comdat any
define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce) local_unnamed_addr #0 {
; GCN-LABEL: test:
; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: v_mov_b32_e32 v1, 2
-; GCN-NEXT: ds_write_b8 v0, v1
-; GCN-NEXT: ds_read_u8 v2, v0 offset:2
-; GCN-NEXT: ds_read_u16 v3, v0
+; GCN-NEXT: v_mov_b32_e32 v0, 2
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: ds_write_b8 v1, v0
+; GCN-NEXT: ds_read_u8 v2, v1 offset:2
+; GCN-NEXT: ds_read_u16 v3, v1
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: ds_write_b8 v0, v2 offset:6
-; GCN-NEXT: ds_write_b16 v0, v3 offset:4
-; GCN-NEXT: v_cmp_eq_u16_sdwa s[2:3], v3, v1 src0_sel:BYTE_0 src1_sel:DWORD
-; GCN-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
-; GCN-NEXT: global_store_byte v0, v1, s[0:1]
+; GCN-NEXT: ds_write_b8 v1, v2 offset:6
+; GCN-NEXT: ds_write_b16 v1, v3 offset:4
+; GCN-NEXT: v_cmp_eq_u16_sdwa s[2:3], v3, v0 src0_sel:BYTE_0 src1_sel:DWORD
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
+; GCN-NEXT: global_store_byte v1, v0, s[0:1]
; GCN-NEXT: s_endpgm
; CHECK-LABEL: define protected amdgpu_kernel void @test(
; CHECK-SAME: ptr addrspace(1) nocapture [[PTR_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
@@ -47,7 +47,6 @@ define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce
; CHECK-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP2]] to i8
; CHECK-NEXT: store i8 [[FROMBOOL8]], ptr addrspace(1) [[PTR_COERCE]], align 1
; CHECK-NEXT: ret void
-;
entry:
store i8 3, ptr addrspace(3) @_f1, align 1
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) @_f2, ptr addrspace(3) noundef align 1 dereferenceable(3) @_f1, i64 3, i1 false)
diff --git a/llvm/test/CodeGen/AMDGPU/mmra.ll b/llvm/test/CodeGen/AMDGPU/mmra.ll
index d9b48f79739b6..0167fcbc4ab7c 100644
--- a/llvm/test/CodeGen/AMDGPU/mmra.ll
+++ b/llvm/test/CodeGen/AMDGPU/mmra.ll
@@ -17,10 +17,9 @@ define void @fence_loads(ptr %ptr) {
; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !1
; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr, align 4)
- ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1, mmra !2
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, mmra !2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !2
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], mmra !2
- ; CHECK-NEXT: FLAT_STORE_BYTE [[COPY3]], killed [[COPY4]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4)
+ ; CHECK-NEXT: FLAT_STORE_BYTE [[COPY3]], killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4)
; CHECK-NEXT: SI_RETURN
fence release, !mmra !0
%ld = load atomic i8, ptr %ptr acquire, align 4, !mmra !2
diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll
index 5c09d2bd61a39..31e8a49b16ca1 100644
--- a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll
@@ -858,14 +858,14 @@ define i8 @flat_inst_valu_offset_64bit_11bit_split0(ptr %p) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
-; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
+; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
; GFX11-SDAG: ; %bb.0:
@@ -892,39 +892,17 @@ define i8 @flat_inst_valu_offset_64bit_11bit_split0(ptr %p) {
; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7ff
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
-; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
-; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -936,13 +914,8 @@ define i8 @flat_inst_valu_offset_64bit_11bit_split0(ptr %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x7ff
-; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -962,14 +935,14 @@ define i8 @flat_inst_valu_offset_64bit_11bit_split1(ptr %p) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
-; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
+; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
; GFX11-SDAG: ; %bb.0:
@@ -996,39 +969,17 @@ define i8 @flat_inst_valu_offset_64bit_11bit_split1(ptr %p) {
; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x800
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
-; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
-; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1040,13 +991,8 @@ define i8 @flat_inst_valu_offset_64bit_11bit_split1(ptr %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x800
-; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1066,14 +1012,14 @@ define i8 @flat_inst_valu_offset_64bit_12bit_split0(ptr %p) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
-; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
+; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
; GFX11-SDAG: ; %bb.0:
@@ -1100,39 +1046,17 @@ define i8 @flat_inst_valu_offset_64bit_12bit_split0(ptr %p) {
; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xfff
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
-; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
-; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1144,13 +1068,8 @@ define i8 @flat_inst_valu_offset_64bit_12bit_split0(ptr %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0xfff
-; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1161,32 +1080,32 @@ define i8 @flat_inst_valu_offset_64bit_12bit_split0(ptr %p) {
; Fill 12-bit low-bits (1ull << 33) | 4096
define i8 @flat_inst_valu_offset_64bit_12bit_split1(ptr %p) {
-; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
-; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
-; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
+; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
-; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
+; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
-; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
-; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
+; GFX11-NEXT: flat_load_u8 v0, v[0:1]
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
; GFX12-SDAG: ; %bb.0:
@@ -1201,46 +1120,6 @@ define i8 @flat_inst_valu_offset_64bit_12bit_split1(ptr %p) {
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1000
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
-; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
-; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1248,13 +1127,8 @@ define i8 @flat_inst_valu_offset_64bit_12bit_split1(ptr %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1000
-; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1274,14 +1148,14 @@ define i8 @flat_inst_valu_offset_64bit_13bit_split0(ptr %p) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
-; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
+; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
; GFX11-SDAG: ; %bb.0:
@@ -1308,39 +1182,17 @@ define i8 @flat_inst_valu_offset_64bit_13bit_split0(ptr %p) {
; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1fff
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
-; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
-; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1352,13 +1204,8 @@ define i8 @flat_inst_valu_offset_64bit_13bit_split0(ptr %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1fff
-; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1369,32 +1216,32 @@ define i8 @flat_inst_valu_offset_64bit_13bit_split0(ptr %p) {
; Fill 13-bit low-bits (1ull << 33) | 8192
define i8 @flat_inst_valu_offset_64bit_13bit_split1(ptr %p) {
-; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
-; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
-; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
+; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
-; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
+; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
-; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
-; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
+; GFX11-NEXT: flat_load_u8 v0, v[0:1]
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
; GFX12-SDAG: ; %bb.0:
@@ -1409,46 +1256,6 @@ define i8 @flat_inst_valu_offset_64bit_13bit_split1(ptr %p) {
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x2000
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
-; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
-; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1456,13 +1263,8 @@ define i8 @flat_inst_valu_offset_64bit_13bit_split1(ptr %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x2000
-; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1483,23 +1285,23 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(ptr %p) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
-; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX11-NEXT: flat_load_u8 v0, v[0:1]
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX12-SDAG: ; %bb.0:
@@ -1517,43 +1319,13 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(ptr %p) {
; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7ff
-; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
-; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
-; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1561,13 +1333,8 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(ptr %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x7ff
-; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1588,23 +1355,23 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(ptr %p) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
-; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX11-NEXT: flat_load_u8 v0, v[0:1]
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX12-SDAG: ; %bb.0:
@@ -1622,43 +1389,13 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(ptr %p) {
; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x800
-; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
-; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
-; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1666,13 +1403,8 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(ptr %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x800
-; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1693,23 +1425,23 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(ptr %p) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
-; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX11-NEXT: flat_load_u8 v0, v[0:1]
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX12-SDAG: ; %bb.0:
@@ -1727,43 +1459,13 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(ptr %p) {
; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xfff
-; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
-; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
-; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1771,13 +1473,8 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(ptr %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0xfff
-; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1798,23 +1495,23 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(ptr %p) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
-; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX11-NEXT: flat_load_u8 v0, v[0:1]
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX12-SDAG: ; %bb.0:
@@ -1832,43 +1529,13 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(ptr %p) {
; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1000
-; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
-; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
-; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1876,13 +1543,8 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(ptr %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1000
-; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1903,23 +1565,23 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(ptr %p) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
-; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX11-NEXT: flat_load_u8 v0, v[0:1]
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX12-SDAG: ; %bb.0:
@@ -1937,43 +1599,13 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(ptr %p) {
; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1fff
-; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
-; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
-; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1981,13 +1613,8 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(ptr %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1fff
-; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2008,23 +1635,23 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(ptr %p) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
-; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX11-NEXT: flat_load_u8 v0, v[0:1]
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX12-SDAG: ; %bb.0:
@@ -2042,43 +1669,13 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(ptr %p) {
; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x2000
-; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
-; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
-; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2086,13 +1683,8 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(ptr %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x2000
-; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4187,3 +3779,6 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(ptr
store i8 %load, ptr undef
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX10-GISEL: {{.*}}
+; GFX10-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
index b5b8213bcd57e..548c196df1da5 100644
--- a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
@@ -924,12 +924,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7ff
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -937,12 +933,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
-; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -950,13 +942,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
-; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -968,13 +955,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x7ff
-; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1028,39 +1010,26 @@ define i8 @global_inst_valu_offset_64bit_11bit_split1(ptr addrspace(1) %p) {
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x800
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
-; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
+; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
-; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1072,13 +1041,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_split1(ptr addrspace(1) %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x800
-; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1092,15 +1056,6 @@ define i8 @global_inst_valu_offset_64bit_11bit_split1(ptr addrspace(1) %p) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
-; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1132,12 +1087,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xfff
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1145,12 +1096,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
-; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1158,13 +1105,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
-; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1176,13 +1118,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0xfff
-; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1233,45 +1170,32 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
; Fill 12-bit low-bits (1ull << 33) | 4096
define i8 @global_inst_valu_offset_64bit_12bit_split1(ptr addrspace(1) %p) {
-; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1000
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
+; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
-; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
+; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
-; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_split1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
+; GFX11-NEXT: global_load_u8 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX12-GISEL: ; %bb.0:
@@ -1280,44 +1204,12 @@ define i8 @global_inst_valu_offset_64bit_12bit_split1(ptr addrspace(1) %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1000
-; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
-; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
-; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
-; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
-; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
-; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1340,12 +1232,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1fff
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1353,12 +1241,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
-; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1366,13 +1250,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
-; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1384,13 +1263,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1fff
-; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1441,45 +1315,32 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
; Fill 13-bit low-bits (1ull << 33) | 8192
define i8 @global_inst_valu_offset_64bit_13bit_split1(ptr addrspace(1) %p) {
-; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x2000
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
+; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
-; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
+; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
-; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_split1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
+; GFX11-NEXT: global_load_u8 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX12-GISEL: ; %bb.0:
@@ -1488,44 +1349,12 @@ define i8 @global_inst_valu_offset_64bit_13bit_split1(ptr addrspace(1) %p) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x2000
-; GFX12-GISEL-NEXT: s_mov_b32 s1, 2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
-; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
-; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
-; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
-; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
-; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1548,12 +1377,9 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1)
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7ff
-; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1561,12 +1387,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1)
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
-; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1574,13 +1396,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1)
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
-; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1592,13 +1409,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1)
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x7ff
-; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1653,39 +1465,27 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1)
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x800
-; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
-; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
-; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1697,13 +1497,8 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1)
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x800
-; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1718,15 +1513,6 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1)
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1758,12 +1544,9 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1)
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xfff
-; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1771,12 +1554,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1)
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
-; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1784,13 +1563,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1)
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
-; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1802,13 +1576,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1)
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0xfff
-; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1863,42 +1632,30 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1)
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1000
-; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
-; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
-; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX11-NEXT: global_load_u8 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX12-GISEL: ; %bb.0:
@@ -1907,13 +1664,8 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1)
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1000
-; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1928,24 +1680,6 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1)
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
-; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1968,12 +1702,9 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1)
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1fff
-; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1981,12 +1712,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1)
; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
-; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1994,13 +1721,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1)
; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
-; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2012,13 +1734,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1)
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x1fff
-; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2073,42 +1790,30 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1)
; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x2000
-; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1
+; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
+; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
-; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
-; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
-; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
+; GFX11-NEXT: global_load_u8 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX12-GISEL: ; %bb.0:
@@ -2117,13 +1822,8 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1)
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_movk_i32 s0, 0x2000
-; GFX12-GISEL-NEXT: s_brev_b32 s1, 1
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
+; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2138,24 +1838,6 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1)
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
-; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
-; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/roundeven.ll b/llvm/test/CodeGen/AMDGPU/roundeven.ll
index 528dcfc6bc038..0f95c0255d3ab 100644
--- a/llvm/test/CodeGen/AMDGPU/roundeven.ll
+++ b/llvm/test/CodeGen/AMDGPU/roundeven.ll
@@ -1008,10 +1008,10 @@ define double @v_roundeven_f64(double %x) {
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3
; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3]
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
+; GFX6-NEXT: v_mov_b32_e32 v6, -1
+; GFX6-NEXT: v_mov_b32_e32 v7, 0x432fffff
; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3]
-; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, v[6:7]
; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -1087,17 +1087,17 @@ define double @v_roundeven_f64_fneg(double %x) {
; GFX6-LABEL: v_roundeven_f64_fneg:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
-; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v6
+; GFX6-NEXT: v_xor_b32_e32 v8, 0x80000000, v1
+; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v8
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3
; GFX6-NEXT: v_add_f64 v[4:5], -v[0:1], v[2:3]
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
+; GFX6-NEXT: v_mov_b32_e32 v6, -1
+; GFX6-NEXT: v_mov_b32_e32 v7, 0x432fffff
; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3]
-; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, v[6:7]
; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_f64_fneg:
diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector_v2x16.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector_v2x16.ll
index 6d243e4bddac5..47e4406762872 100644
--- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector_v2x16.ll
+++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector_v2x16.ll
@@ -1,10 +1,8 @@
-; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-OPT %s
-; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-NOOPT %s
+; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; GCN-LABEL: {{^}}scalar_to_vector_i16:
-; GCN-NOOPT: s_mov_b32 [[S:s[0-9]+]], 42
-; GCN-NOOPT: v_mov_b32_e32 [[V:v[0-9]+]], [[S]]
-; GCN-OPT: v_mov_b32_e32 [[V:v[0-9]+]], 42
+; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 42
; GCN: buffer_store_short [[V]],
define void @scalar_to_vector_i16() {
%tmp = load <2 x i16>, ptr addrspace(5) undef
@@ -14,9 +12,7 @@ define void @scalar_to_vector_i16() {
}
; GCN-LABEL: {{^}}scalar_to_vector_f16:
-; GCN-NOOPT: s_mov_b32 [[S:s[0-9]+]], 0x3c00
-; GCN-NOOPT: v_mov_b32_e32 [[V:v[0-9]+]], [[S]]
-; GCN-OPT: v_mov_b32_e32 [[V:v[0-9]+]], 0x3c00
+; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x3c00
; GCN: buffer_store_short [[V]],
define void @scalar_to_vector_f16() {
%tmp = load <2 x half>, ptr addrspace(5) undef
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
index 20dc5ad5c8665..50927a2cf21af 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
@@ -25,8 +25,7 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9() {
; CHECK-NEXT: ; implicit-def: $sgpr4
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], v2, s4
-; CHECK-NEXT: s_mov_b32 s4, 0
-; CHECK-NEXT: v_mov_b32_e32 v2, s4
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: ds_write_b8 v1, v2
; CHECK-NEXT: s_mov_b64 s[4:5], exec
; CHECK-NEXT: v_writelane_b32 v0, s4, 0
>From 4a7ffebdfee999458454daca757d27dd3eafa320 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 29 Jul 2024 22:02:04 +0400
Subject: [PATCH 2/2] Update llvm/lib/Target/AMDGPU/SIInstructions.td
Co-authored-by: Joe Nash <joseph.nash at amd.com>
---
llvm/lib/Target/AMDGPU/SIInstructions.td | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d2101654d2acb..969e83133c76c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2163,7 +2163,18 @@ def : GCNPat <
(S_MOV_B32 $ga)
>;
-def : GCNPat <
+foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
+ let True16Predicate = pred in
+ def : GCNPat <
+ (VGPRImm<(i16 imm)>:$imm),
+ (V_MOV_B32_e32 imm:$imm)
+ >;
+}
+let True16Predicate = UseRealTrue16Insts in
+def : GCNPat <
+ (VGPRImm<(i16 imm)>:$imm),
+ (V_MOV_B16_t16_e64 0, imm:$imm, 0)
+>;
(VGPRImm<(i16 imm)>:$imm),
(V_MOV_B32_e32 imm:$imm)
>;
More information about the llvm-commits
mailing list