[llvm] [TargetLowering] Add a new function `getNullPtrValue` (PR #126665)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 10 20:59:20 PST 2025
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/126665
>From 7576732d3e2d3d8cca9203ea033a208872eaa5b6 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Mon, 10 Feb 2025 23:59:01 -0500
Subject: [PATCH] [TargetLowering] Add a new function `getNullPtrValue`
In most cases, `nullptr` is a zero-value constant with the corresponding pointer
type. However, this is not always the case. For example, AMDGPU uses
`0xffffffff` as nullptr for AS3 and AS5, leading to lowering issues. Currently,
to ensure correct lowering, `ptr addrspace(5) null` must be written as
`addrspacecast (ptr null to ptr addrspace(5))`.
This PR introduces `TargetLowering::getNullPtrValue` to determine the correct
value of `nullptr`. This helps with proper lowering of `ConstantPointerNull`,
which already has the correct address space.
Fixes #115083.
---
llvm/include/llvm/CodeGen/TargetLowering.h | 7 +
.../SelectionDAG/SelectionDAGBuilder.cpp | 3 +-
.../CodeGen/SelectionDAG/TargetLowering.cpp | 5 +
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 9 ++
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 3 +
.../AMDGPU/agpr-copy-no-free-registers.ll | 72 +++++----
...der-no-live-segment-at-def-implicit-def.ll | 11 +-
.../branch-folding-implicit-def-subreg.ll | 127 ++++++++-------
.../CodeGen/AMDGPU/cf-loop-on-constant.ll | 4 +-
llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll | 28 ++--
llvm/test/CodeGen/AMDGPU/collapse-endcf.ll | 30 ++--
...cannot-create-empty-or-backward-segment.ll | 7 +-
.../hazard-recognizer-src-shared-base.ll | 11 +-
.../AMDGPU/llvm.amdgcn.set.inactive.ll | 4 +-
llvm/test/CodeGen/AMDGPU/load-hi16.ll | 16 +-
llvm/test/CodeGen/AMDGPU/load-lo16.ll | 14 +-
...p-var-out-of-divergent-loop-swdev407790.ll | 2 +-
llvm/test/CodeGen/AMDGPU/nullptr-lowering.ll | 102 ++++++++++++
.../AMDGPU/promote-constOffset-to-imm.ll | 71 +++++----
.../AMDGPU/sdwa-peephole-instr-combine-sel.ll | 3 +-
.../test/CodeGen/AMDGPU/setcc-multiple-use.ll | 2 +-
.../CodeGen/AMDGPU/stacksave_stackrestore.ll | 10 +-
...-call-inreg-arguments.convergencetokens.ll | 2 +-
.../AMDGPU/tail-call-inreg-arguments.ll | 2 +-
...-in-vgprs-issue110930.convergencetokens.ll | 2 +-
...all-uniform-target-in-vgprs-issue110930.ll | 2 +-
.../AMDGPU/tuple-allocation-failure.ll | 22 +--
.../AMDGPU/unstructured-cfg-def-use-issue.ll | 149 +++++++++---------
llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll | 6 +-
.../CodeGen/AMDGPU/waterfall_kills_scc.ll | 13 +-
llvm/test/CodeGen/AMDGPU/wqm.ll | 18 ++-
31 files changed, 459 insertions(+), 298 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/nullptr-lowering.ll
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index bbecc7a6ddaee79..6f7b64e663e84c3 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5659,6 +5659,13 @@ class TargetLowering : public TargetLoweringBase {
LoadSDNode *OriginalLoad,
SelectionDAG &DAG) const;
+ /// Return the value of nullptr. In most cases, nullptr is a zero-value
+ /// constant with the corresponding pointer type. However, this is not always
+ /// the case. For certain address spaces on some targets, it could be a value
+ /// like ~0U.
+ virtual SDValue getNullPtrValue(unsigned AS, const SDLoc &DL,
+ SelectionDAG &DAG) const;
+
private:
SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7178f6398bede50..adc1d531826e73e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1807,8 +1807,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (isa<ConstantPointerNull>(C)) {
unsigned AS = V->getType()->getPointerAddressSpace();
- return DAG.getConstant(0, getCurSDLoc(),
- TLI.getPointerTy(DAG.getDataLayout(), AS));
+ return TLI.getNullPtrValue(AS, getCurSDLoc(), DAG);
}
if (match(C, m_VScale()))
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index adfb96041c5c06b..13220a8e9cf1294 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -12188,3 +12188,8 @@ SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
return Load;
}
+
+SDValue TargetLowering::getNullPtrValue(unsigned AS, const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ return DAG.getConstant(0, DL, getPointerTy(DAG.getDataLayout(), AS));
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 792e17eeedab141..021f602a56ed78a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -6050,3 +6050,12 @@ bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
Register N0, Register N1) const {
return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
}
+
+SDValue AMDGPUTargetLowering::getNullPtrValue(unsigned AS, const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::LOCAL_ADDRESS) {
+ return DAG.getConstant(0xffffffff, DL,
+ getPointerTy(DAG.getDataLayout(), AS));
+ }
+ return TargetLowering::getNullPtrValue(AS, DL, DAG);
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index c74dc7942f52c07..9e6b2eecb5c28b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -387,6 +387,9 @@ class AMDGPUTargetLowering : public TargetLowering {
MVT getFenceOperandTy(const DataLayout &DL) const override {
return MVT::i32;
}
+
+ SDValue getNullPtrValue(unsigned AS, const SDLoc &DL,
+ SelectionDAG &DAG) const override;
};
namespace AMDGPUISD {
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index 4ce46bbaf45ac14..9c4fb346a1fb7e1 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -521,11 +521,12 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: v_cvt_f32_u32_e32 v0, s3
; GFX908-NEXT: s_sub_i32 s8, 0, s3
-; GFX908-NEXT: v_cvt_f32_f16_e32 v17, s7
-; GFX908-NEXT: v_mov_b32_e32 v19, 0
+; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s7
+; GFX908-NEXT: v_mov_b32_e32 v17, 0
; GFX908-NEXT: v_rcp_iflag_f32_e32 v2, v0
; GFX908-NEXT: v_mov_b32_e32 v0, 0
; GFX908-NEXT: v_mov_b32_e32 v1, 0
+; GFX908-NEXT: v_mov_b32_e32 v20, -1
; GFX908-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GFX908-NEXT: v_cvt_u32_f32_e32 v2, v2
; GFX908-NEXT: v_readfirstlane_b32 s10, v2
@@ -544,7 +545,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: s_cmp_ge_u32 s2, s3
; GFX908-NEXT: s_cselect_b32 s8, s10, s8
; GFX908-NEXT: s_lshr_b32 s7, s7, 16
-; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s7
+; GFX908-NEXT: v_cvt_f32_f16_e32 v19, s7
; GFX908-NEXT: s_lshl_b64 s[2:3], s[0:1], 5
; GFX908-NEXT: s_lshl_b64 s[12:13], s[8:9], 5
; GFX908-NEXT: s_lshl_b64 s[10:11], s[4:5], 5
@@ -611,37 +612,37 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: ; => This Inner Loop Header: Depth=2
; GFX908-NEXT: s_add_u32 s20, s18, s7
; GFX908-NEXT: s_addc_u32 s21, s19, s9
-; GFX908-NEXT: global_load_dword v21, v19, s[20:21] offset:-12 glc
+; GFX908-NEXT: global_load_dword v22, v17, s[20:21] offset:-12 glc
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: global_load_dword v20, v19, s[20:21] offset:-8 glc
+; GFX908-NEXT: global_load_dword v21, v17, s[20:21] offset:-8 glc
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: global_load_dword v12, v19, s[20:21] offset:-4 glc
+; GFX908-NEXT: global_load_dword v12, v17, s[20:21] offset:-4 glc
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: global_load_dword v12, v19, s[20:21] glc
+; GFX908-NEXT: global_load_dword v12, v17, s[20:21] glc
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: ds_read_b64 v[12:13], v19
+; GFX908-NEXT: ds_read_b64 v[12:13], v20
; GFX908-NEXT: ds_read_b64 v[14:15], v0
; GFX908-NEXT: s_and_b64 vcc, exec, s[0:1]
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: s_cbranch_vccnz .LBB3_7
; GFX908-NEXT: ; %bb.6: ; %bb51
; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2
-; GFX908-NEXT: v_cvt_f32_f16_sdwa v22, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX908-NEXT: v_cvt_f32_f16_sdwa v23, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX908-NEXT: v_cvt_f32_f16_e32 v22, v22
+; GFX908-NEXT: v_cvt_f32_f16_sdwa v24, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX908-NEXT: v_cvt_f32_f16_e32 v21, v21
-; GFX908-NEXT: v_cvt_f32_f16_sdwa v23, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX908-NEXT: v_cvt_f32_f16_e32 v20, v20
-; GFX908-NEXT: v_add_f32_e32 v24, v17, v12
-; GFX908-NEXT: v_add_f32_e32 v25, v18, v13
-; GFX908-NEXT: v_add_f32_e32 v26, 0, v12
-; GFX908-NEXT: v_add_f32_e32 v27, 0, v13
-; GFX908-NEXT: v_add_f32_e32 v15, v22, v15
-; GFX908-NEXT: v_add_f32_e32 v14, v21, v14
-; GFX908-NEXT: v_add_f32_e32 v13, v23, v13
-; GFX908-NEXT: v_add_f32_e32 v12, v20, v12
-; GFX908-NEXT: v_add_f32_e32 v5, v5, v25
-; GFX908-NEXT: v_add_f32_e32 v4, v4, v24
-; GFX908-NEXT: v_add_f32_e32 v7, v7, v27
-; GFX908-NEXT: v_add_f32_e32 v6, v6, v26
+; GFX908-NEXT: v_add_f32_e32 v25, v18, v12
+; GFX908-NEXT: v_add_f32_e32 v26, v19, v13
+; GFX908-NEXT: v_add_f32_e32 v27, 0, v12
+; GFX908-NEXT: v_add_f32_e32 v28, 0, v13
+; GFX908-NEXT: v_add_f32_e32 v15, v23, v15
+; GFX908-NEXT: v_add_f32_e32 v14, v22, v14
+; GFX908-NEXT: v_add_f32_e32 v13, v24, v13
+; GFX908-NEXT: v_add_f32_e32 v12, v21, v12
+; GFX908-NEXT: v_add_f32_e32 v5, v5, v26
+; GFX908-NEXT: v_add_f32_e32 v4, v4, v25
+; GFX908-NEXT: v_add_f32_e32 v7, v7, v28
+; GFX908-NEXT: v_add_f32_e32 v6, v6, v27
; GFX908-NEXT: v_add_f32_e32 v8, v8, v14
; GFX908-NEXT: v_add_f32_e32 v9, v9, v15
; GFX908-NEXT: v_add_f32_e32 v10, v10, v12
@@ -686,6 +687,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3
; GFX90A-NEXT: s_sub_i32 s8, 0, s3
; GFX90A-NEXT: v_mov_b32_e32 v19, 0
+; GFX90A-NEXT: v_mov_b32_e32 v20, -1
; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v0
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GFX90A-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
@@ -770,15 +772,15 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2
; GFX90A-NEXT: s_add_u32 s20, s18, s7
; GFX90A-NEXT: s_addc_u32 s21, s19, s9
-; GFX90A-NEXT: global_load_dword v21, v19, s[20:21] offset:-12 glc
+; GFX90A-NEXT: global_load_dword v22, v19, s[20:21] offset:-12 glc
; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: global_load_dword v20, v19, s[20:21] offset:-8 glc
+; GFX90A-NEXT: global_load_dword v21, v19, s[20:21] offset:-8 glc
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_load_dword v14, v19, s[20:21] offset:-4 glc
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_load_dword v14, v19, s[20:21] glc
; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: ds_read_b64 v[14:15], v19
+; GFX90A-NEXT: ds_read_b64 v[14:15], v20
; GFX90A-NEXT: ds_read_b64 v[16:17], v0
; GFX90A-NEXT: s_and_b64 vcc, exec, s[0:1]
; GFX90A-NEXT: ; kill: killed $sgpr20 killed $sgpr21
@@ -786,16 +788,16 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: s_cbranch_vccnz .LBB3_7
; GFX90A-NEXT: ; %bb.6: ; %bb51
; GFX90A-NEXT: ; in Loop: Header=BB3_5 Depth=2
-; GFX90A-NEXT: v_cvt_f32_f16_sdwa v23, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX90A-NEXT: v_cvt_f32_f16_e32 v22, v21
-; GFX90A-NEXT: v_cvt_f32_f16_sdwa v21, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX90A-NEXT: v_cvt_f32_f16_e32 v20, v20
-; GFX90A-NEXT: v_pk_add_f32 v[24:25], v[2:3], v[14:15]
-; GFX90A-NEXT: v_pk_add_f32 v[26:27], v[14:15], 0 op_sel_hi:[1,0]
+; GFX90A-NEXT: v_cvt_f32_f16_sdwa v23, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX90A-NEXT: v_cvt_f32_f16_e32 v22, v22
+; GFX90A-NEXT: v_cvt_f32_f16_sdwa v25, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX90A-NEXT: v_cvt_f32_f16_e32 v24, v21
+; GFX90A-NEXT: v_pk_add_f32 v[26:27], v[2:3], v[14:15]
+; GFX90A-NEXT: v_pk_add_f32 v[28:29], v[14:15], 0 op_sel_hi:[1,0]
; GFX90A-NEXT: v_pk_add_f32 v[16:17], v[22:23], v[16:17]
-; GFX90A-NEXT: v_pk_add_f32 v[14:15], v[20:21], v[14:15]
-; GFX90A-NEXT: v_pk_add_f32 v[6:7], v[6:7], v[24:25]
-; GFX90A-NEXT: v_pk_add_f32 v[8:9], v[8:9], v[26:27]
+; GFX90A-NEXT: v_pk_add_f32 v[14:15], v[24:25], v[14:15]
+; GFX90A-NEXT: v_pk_add_f32 v[6:7], v[6:7], v[26:27]
+; GFX90A-NEXT: v_pk_add_f32 v[8:9], v[8:9], v[28:29]
; GFX90A-NEXT: v_pk_add_f32 v[10:11], v[10:11], v[16:17]
; GFX90A-NEXT: v_pk_add_f32 v[12:13], v[12:13], v[14:15]
; GFX90A-NEXT: s_mov_b64 s[20:21], -1
diff --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
index f9ffa5ae57f3ed9..cd38381d3520df2 100644
--- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
+++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
@@ -73,11 +73,12 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_mov_b32 s38, s36
; CHECK-NEXT: s_mov_b32 s39, s36
; CHECK-NEXT: .LBB0_7: ; %if.end294.i.i
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
-; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
-; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
-; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
-; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; CHECK-NEXT: v_mov_b32_e32 v0, -1
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:11
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:7
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:3
; CHECK-NEXT: .LBB0_8: ; %kernel_direct_lighting.exit
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x20
; CHECK-NEXT: v_mov_b32_e32 v0, s36
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index 98136347ab702ca..6de9fc050c821c8 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -25,8 +25,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr33, 8, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_CSELECT_B64 -1, 0, implicit killed $scc
; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_XOR_B64 killed renamable $sgpr18_sgpr19, -1, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr2 = DS_READ_B32_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(3) null`, align 8, addrspace 3)
; GFX90A-NEXT: renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr2 = DS_READ_B32_gfx9 renamable $vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(3) null`, align 8, addrspace 3)
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr28_sgpr29, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.2, implicit $vcc
@@ -143,8 +144,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.10(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: renamable $vgpr2 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN renamable $vgpr8, killed renamable $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 3, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.10.Flow33:
@@ -160,8 +162,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.12(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: renamable $vgpr2 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN renamable $vgpr6, killed renamable $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 3, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.12.Flow34:
@@ -177,8 +180,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.14(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: renamable $vgpr2 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN renamable $vgpr4, killed renamable $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 3, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.14.Flow35:
@@ -217,8 +221,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.18(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN renamable $vgpr46, killed renamable $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 3, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.18.Flow37:
@@ -234,8 +239,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.20(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN renamable $vgpr62, killed renamable $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 3, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.20.Flow38:
@@ -251,8 +257,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.22(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN renamable $vgpr60, killed renamable $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 3, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.22.Flow39:
@@ -268,8 +275,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.24(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN renamable $vgpr58, killed renamable $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 3, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.24.Flow40:
@@ -285,8 +293,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.26(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN renamable $vgpr56, killed renamable $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 3, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.26.Flow41:
@@ -302,8 +311,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.28(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN renamable $vgpr44, killed renamable $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 3, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.28.Flow42:
@@ -327,8 +337,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.31(0x80000000)
; GFX90A-NEXT: liveins: $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN renamable $vgpr40, killed renamable $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 3, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.31.Flow44:
@@ -353,8 +364,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.29(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN renamable $vgpr42, killed renamable $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 3, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.29
; GFX90A-NEXT: {{ $}}
@@ -766,7 +778,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr56_sgpr57:0x000000000000000F, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr53 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr64_sgpr65, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr10 = V_MOV_B32_e32 -1, implicit $exec
; GFX90A-NEXT: renamable $vgpr14_vgpr15 = DS_READ_B64_gfx9 killed renamable $vgpr10, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr21, implicit $exec
; GFX90A-NEXT: renamable $vgpr16_vgpr17 = DS_READ_B64_gfx9 killed renamable $vgpr10, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3)
@@ -823,7 +835,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.3(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
; GFX90A-NEXT: renamable $vgpr22_vgpr23 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr23, implicit $exec
; GFX90A-NEXT: renamable $vgpr20_vgpr21 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.434, addrspace 3)
@@ -933,21 +945,22 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U32_sdwa 0, killed $vgpr17, 0, $vgpr3, 0, 0, 6, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_CNDMASK_B32_e64 0, 0, 0, killed $vgpr2, killed $vcc, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr15, implicit $exec
- ; GFX90A-NEXT: DS_WRITE2_B32_gfx9 killed renamable $vgpr3, killed renamable $vgpr2, renamable $vgpr3, 0, 1, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, align 4, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr10 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: DS_WRITE2_B32_gfx9 killed renamable $vgpr10, killed renamable $vgpr2, killed renamable $vgpr3, 0, 1, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, align 4, addrspace 3)
; GFX90A-NEXT: S_BRANCH %bb.65
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.68.bb174:
; GFX90A-NEXT: successors: %bb.72(0x40000000), %bb.69(0x40000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr26 = V_OR_B32_e32 1, $vgpr24, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr48 = V_OR_B32_e32 $vgpr26, $vgpr22, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr34 = V_OR_B32_e32 $vgpr48, $vgpr20, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr28 = V_CNDMASK_B32_e64 0, $vgpr34, 0, 0, $sgpr12_sgpr13, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr38 = V_OR_B32_e32 $vgpr28, $vgpr18, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr36 = V_OR_B32_e32 $vgpr38, $vgpr10, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr32 = V_OR_B32_e32 $vgpr36, $vgpr12, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr50 = V_CNDMASK_B32_e64 0, 0, 0, $vgpr32, killed $sgpr12_sgpr13, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr50 = V_OR_B32_e32 1, $vgpr24, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr38 = V_OR_B32_e32 $vgpr50, $vgpr22, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr32 = V_OR_B32_e32 $vgpr38, $vgpr20, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr26 = V_CNDMASK_B32_e64 0, $vgpr32, 0, 0, $sgpr12_sgpr13, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr36 = V_OR_B32_e32 $vgpr26, $vgpr18, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr34 = V_OR_B32_e32 $vgpr36, $vgpr10, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr28 = V_OR_B32_e32 $vgpr34, $vgpr12, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr48 = V_CNDMASK_B32_e64 0, 0, 0, $vgpr28, killed $sgpr12_sgpr13, implicit $exec
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr28_sgpr29, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.72, implicit $vcc
@@ -967,26 +980,27 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr27, implicit $exec
; GFX90A-NEXT: renamable $vgpr2, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr26, $vgpr2, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr3, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr10, killed $vgpr3, killed $vcc, 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr27 = V_MOV_B32_e32 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr49 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr35 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr39 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr37 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr29 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr51 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr33 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr27, renamable $vgpr26_vgpr27, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
- ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr21, implicit $exec
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr10, killed renamable $vgpr48_vgpr49, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
- ; GFX90A-NEXT: renamable $vgpr12 = COPY killed renamable $sgpr22, implicit $exec
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr12, killed renamable $vgpr34_vgpr35, 0, 0, implicit $exec :: (store (s64) into %ir.8, addrspace 3)
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr27, killed renamable $vgpr38_vgpr39, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr10, killed renamable $vgpr36_vgpr37, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr27, killed renamable $vgpr28_vgpr29, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr10, killed renamable $vgpr50_vgpr51, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr27, killed renamable $vgpr32_vgpr33, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: renamable $vgpr51 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr39 = COPY renamable $vgpr51, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr33 = COPY renamable $vgpr51, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr37 = COPY renamable $vgpr51, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr35 = COPY renamable $vgpr51, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr27 = COPY renamable $vgpr51, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr49 = COPY renamable $vgpr51, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr29 = COPY renamable $vgpr51, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr10 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr10, killed renamable $vgpr50_vgpr51, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr12 = COPY renamable $sgpr21, implicit $exec
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr12, killed renamable $vgpr38_vgpr39, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr14 = COPY killed renamable $sgpr22, implicit $exec
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr14, killed renamable $vgpr32_vgpr33, 0, 0, implicit $exec :: (store (s64) into %ir.8, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr10, killed renamable $vgpr36_vgpr37, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr12, killed renamable $vgpr34_vgpr35, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr10, killed renamable $vgpr26_vgpr27, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr12, killed renamable $vgpr48_vgpr49, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr10, killed renamable $vgpr28_vgpr29, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN killed renamable $vgpr2, killed renamable $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 3, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.71.Flow9:
; GFX90A-NEXT: successors: %bb.63(0x80000000)
@@ -999,10 +1013,11 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.69(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 $vgpr50, killed $vgpr16, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 $vgpr48, killed $vgpr16, implicit $exec
; GFX90A-NEXT: renamable $vgpr54 = V_OR_B32_e32 killed $vgpr10, killed $vgpr14, implicit $exec
; GFX90A-NEXT: renamable $vgpr55 = V_MOV_B32_e32 0, implicit $exec
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr55, renamable $vgpr54_vgpr55, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr10 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr10, killed renamable $vgpr54_vgpr55, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_MOV_B64 0
; GFX90A-NEXT: S_BRANCH %bb.69
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
index fa4e82da1d18e7d..004261768ede749 100644
--- a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
@@ -365,7 +365,7 @@ for.body:
define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind {
; GCN-LABEL: loop_arg_0:
; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: v_mov_b32_e32 v0, -1
; GCN-NEXT: s_mov_b32 m0, -1
; GCN-NEXT: ds_read_u8 v0, v0
; GCN-NEXT: s_load_dword s4, s[4:5], 0x9
@@ -401,7 +401,7 @@ define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind {
; GCN_DBG-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0
-; GCN_DBG-NEXT: v_mov_b32_e32 v0, 0
+; GCN_DBG-NEXT: v_mov_b32_e32 v0, -1
; GCN_DBG-NEXT: s_mov_b32 m0, -1
; GCN_DBG-NEXT: ds_read_u8 v0, v0
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index 21a2ae80574e0fc..90ef3737e4aa8d5 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -11,8 +11,8 @@ define <2 x half> @chain_hi_to_lo_private() {
; GFX900: ; %bb.0: ; %bb
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:2
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: buffer_load_short_d16_hi v0, off, s[0:3], 0
+; GFX900-NEXT: v_mov_b32_e32 v1, -1
+; GFX900-NEXT: buffer_load_short_d16_hi v0, v1, s[0:3], 0 offen
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -21,7 +21,7 @@ define <2 x half> @chain_hi_to_lo_private() {
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: s_mov_b32 s0, 2
; FLATSCR-NEXT: scratch_load_ushort v0, off, s0
-; FLATSCR-NEXT: s_mov_b32 s0, 0
+; FLATSCR-NEXT: s_mov_b32 s0, -1
; FLATSCR-NEXT: scratch_load_short_d16_hi v0, off, s0
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
@@ -29,9 +29,9 @@ define <2 x half> @chain_hi_to_lo_private() {
; GFX10_DEFAULT-LABEL: chain_hi_to_lo_private:
; GFX10_DEFAULT: ; %bb.0: ; %bb
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_DEFAULT-NEXT: s_clause 0x1
; GFX10_DEFAULT-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:2
-; GFX10_DEFAULT-NEXT: buffer_load_short_d16_hi v0, off, s[0:3], 0
+; GFX10_DEFAULT-NEXT: v_mov_b32_e32 v1, -1
+; GFX10_DEFAULT-NEXT: buffer_load_short_d16_hi v0, v1, s[0:3], 0 offen
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0)
; GFX10_DEFAULT-NEXT: s_setpc_b64 s[30:31]
;
@@ -41,7 +41,7 @@ define <2 x half> @chain_hi_to_lo_private() {
; FLATSCR_GFX10-NEXT: s_mov_b32 s0, 2
; FLATSCR_GFX10-NEXT: scratch_load_ushort v0, off, s0
; FLATSCR_GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; FLATSCR_GFX10-NEXT: s_mov_b32 s0, 0
+; FLATSCR_GFX10-NEXT: s_mov_b32 s0, -1
; FLATSCR_GFX10-NEXT: scratch_load_short_d16_hi v0, off, s0
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0)
; FLATSCR_GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -51,7 +51,7 @@ define <2 x half> @chain_hi_to_lo_private() {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s0, 2
; GFX11-NEXT: scratch_load_u16 v0, off, s0
-; GFX11-NEXT: s_mov_b32 s0, 0
+; GFX11-NEXT: s_mov_b32 s0, -1
; GFX11-NEXT: scratch_load_d16_hi_b16 v0, off, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -178,8 +178,9 @@ define <2 x half> @chain_hi_to_lo_group() {
; GCN-LABEL: chain_hi_to_lo_group:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v1, 0
-; GCN-NEXT: ds_read_u16 v0, v1 offset:2
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: ds_read_u16 v0, v0 offset:2
+; GCN-NEXT: v_mov_b32_e32 v1, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: ds_read_u16_d16_hi v0, v1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -188,8 +189,9 @@ define <2 x half> @chain_hi_to_lo_group() {
; GFX10-LABEL: chain_hi_to_lo_group:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-NEXT: ds_read_u16 v0, v1 offset:2
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: v_mov_b32_e32 v1, -1
+; GFX10-NEXT: ds_read_u16 v0, v0 offset:2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: ds_read_u16_d16_hi v0, v1
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
@@ -198,8 +200,8 @@ define <2 x half> @chain_hi_to_lo_group() {
; GFX11-LABEL: chain_hi_to_lo_group:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_mov_b32_e32 v1, 0
-; GFX11-NEXT: ds_load_u16 v0, v1 offset:2
+; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, -1
+; GFX11-NEXT: ds_load_u16 v0, v0 offset:2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ds_load_u16_d16_hi v0, v1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index 50c9c0cb64ccd60..d1cadc31314411f 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -35,7 +35,7 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
; GCN-NEXT: .LBB0_3: ; %bb.outer.end
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
; GCN-NEXT: v_mov_b32_e32 v0, 3
-; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: v_mov_b32_e32 v1, -1
; GCN-NEXT: s_mov_b32 m0, -1
; GCN-NEXT: ds_write_b32 v1, v0
; GCN-NEXT: s_endpgm
@@ -142,7 +142,7 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: v_readlane_b32 s1, v4, 3
; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
; GCN-O0-NEXT: v_mov_b32_e32 v1, 3
-; GCN-O0-NEXT: v_mov_b32_e32 v0, 0
+; GCN-O0-NEXT: v_mov_b32_e32 v0, -1
; GCN-O0-NEXT: s_mov_b32 m0, -1
; GCN-O0-NEXT: ds_write_b32 v0, v1
; GCN-O0-NEXT: s_endpgm
@@ -204,7 +204,7 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
; GCN-NEXT: s_waitcnt expcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, 3
-; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: v_mov_b32_e32 v1, -1
; GCN-NEXT: s_mov_b32 m0, -1
; GCN-NEXT: ds_write_b32 v1, v0
; GCN-NEXT: s_endpgm
@@ -332,7 +332,7 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: s_branch .LBB1_3
; GCN-O0-NEXT: .LBB1_5: ; %bb.outer.end
; GCN-O0-NEXT: v_mov_b32_e32 v1, 3
-; GCN-O0-NEXT: v_mov_b32_e32 v0, 0
+; GCN-O0-NEXT: v_mov_b32_e32 v0, -1
; GCN-O0-NEXT: s_mov_b32 m0, -1
; GCN-O0-NEXT: ds_write_b32 v0, v1
; GCN-O0-NEXT: s_endpgm
@@ -378,9 +378,10 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GCN-NEXT: s_cbranch_execz .LBB2_5
; GCN-NEXT: ; %bb.1: ; %bb.outer.then
-; GCN-NEXT: v_mov_b32_e32 v4, s1
-; GCN-NEXT: v_add_i32_e32 v3, vcc, s0, v1
-; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc
+; GCN-NEXT: s_waitcnt expcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v2, s1
+; GCN-NEXT: v_add_i32_e32 v1, vcc, s0, v1
+; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 2, v0
; GCN-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GCN-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
@@ -391,8 +392,8 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-NEXT: s_mov_b32 s4, s6
; GCN-NEXT: s_mov_b32 s5, s6
; GCN-NEXT: v_mov_b32_e32 v0, 2
-; GCN-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 offset:8
-; GCN-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GCN-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 offset:8
+; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2
; GCN-NEXT: .LBB2_3: ; %Flow
; GCN-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
; GCN-NEXT: s_cbranch_execz .LBB2_5
@@ -403,13 +404,14 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-NEXT: s_mov_b32 s5, s6
; GCN-NEXT: s_waitcnt expcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, 1
-; GCN-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 offset:4
+; GCN-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 offset:4
; GCN-NEXT: .LBB2_5: ; %bb.outer.end
; GCN-NEXT: s_or_b64 exec, exec, s[2:3]
; GCN-NEXT: s_waitcnt expcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, 3
+; GCN-NEXT: v_mov_b32_e32 v1, -1
; GCN-NEXT: s_mov_b32 m0, -1
-; GCN-NEXT: ds_write_b32 v2, v0
+; GCN-NEXT: ds_write_b32 v1, v0
; GCN-NEXT: s_endpgm
;
; GCN-O0-LABEL: nested_if_if_else:
@@ -560,7 +562,7 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: v_readlane_b32 s1, v4, 3
; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
; GCN-O0-NEXT: v_mov_b32_e32 v1, 3
-; GCN-O0-NEXT: v_mov_b32_e32 v0, 0
+; GCN-O0-NEXT: v_mov_b32_e32 v0, -1
; GCN-O0-NEXT: s_mov_b32 m0, -1
; GCN-O0-NEXT: ds_write_b32 v0, v1
; GCN-O0-NEXT: s_endpgm
@@ -649,7 +651,7 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_waitcnt expcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, 3
-; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: v_mov_b32_e32 v1, -1
; GCN-NEXT: s_mov_b32 m0, -1
; GCN-NEXT: ds_write_b32 v1, v0
; GCN-NEXT: s_endpgm
@@ -842,7 +844,7 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: v_readlane_b32 s1, v6, 3
; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
; GCN-O0-NEXT: v_mov_b32_e32 v1, 3
-; GCN-O0-NEXT: v_mov_b32_e32 v0, 0
+; GCN-O0-NEXT: v_mov_b32_e32 v0, -1
; GCN-O0-NEXT: s_mov_b32 m0, -1
; GCN-O0-NEXT: ds_write_b32 v0, v1
; GCN-O0-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll
index 9104dc68eb9b495..8dacf832d750232 100644
--- a/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll
+++ b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll
@@ -31,7 +31,8 @@ define amdgpu_kernel void @cannot_create_empty_or_backwards_segment(i1 %arg, i1
; CHECK-NEXT: s_and_b64 s[4:5], exec, s[4:5]
; CHECK-NEXT: s_and_b64 s[6:7], exec, s[10:11]
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v0, -1
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_branch .LBB0_3
; CHECK-NEXT: .LBB0_1: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_mov_b64 s[18:19], 0
@@ -98,8 +99,8 @@ define amdgpu_kernel void @cannot_create_empty_or_backwards_segment(i1 %arg, i1
; CHECK-NEXT: s_cbranch_vccnz .LBB0_15
; CHECK-NEXT: ; %bb.14: ; %bb15
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4
-; CHECK-NEXT: buffer_store_dword v0, off, s[24:27], 0
+; CHECK-NEXT: buffer_store_dword v1, v0, s[24:27], 0 offen
+; CHECK-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:3
; CHECK-NEXT: .LBB0_15: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_mov_b64 s[20:21], 0
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-recognizer-src-shared-base.ll b/llvm/test/CodeGen/AMDGPU/hazard-recognizer-src-shared-base.ll
index 4aa49f2c9296d73..5e95ac9b619fe92 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-recognizer-src-shared-base.ll
+++ b/llvm/test/CodeGen/AMDGPU/hazard-recognizer-src-shared-base.ll
@@ -4,11 +4,12 @@
define amdgpu_kernel void @foo() {
; CHECK-LABEL: foo:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_mov_b64 s[0:1], src_shared_base
-; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; CHECK-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s1
-; CHECK-NEXT: v_dual_mov_b32 v2, v0 :: v_dual_mov_b32 v3, v0
-; CHECK-NEXT: flat_store_b64 v[0:1], v[2:3]
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: v_mov_b32_e32 v3, 0
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; CHECK-NEXT: v_mov_b32_e32 v1, v0
+; CHECK-NEXT: flat_store_b64 v[2:3], v[0:1]
; CHECK-NEXT: s_endpgm
entry:
br label %bb1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
index 6cb2d6d55ea3209..1f274c9589313b6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
@@ -451,7 +451,7 @@ define amdgpu_kernel void @set_inactive_p3(ptr addrspace(1) %out, ptr addrspace(
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s6
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT: v_cndmask_b32_e64 v0, 0, v1, s[4:5]
+; GCN-NEXT: v_cndmask_b32_e64 v0, -1, v1, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v1, v0
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0
@@ -472,7 +472,7 @@ define amdgpu_kernel void @set_inactive_p5(ptr addrspace(1) %out, ptr addrspace(
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s6
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT: v_cndmask_b32_e64 v0, 0, v1, s[4:5]
+; GCN-NEXT: v_cndmask_b32_e64 v0, -1, v1, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v1, v0
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0
diff --git a/llvm/test/CodeGen/AMDGPU/load-hi16.ll b/llvm/test/CodeGen/AMDGPU/load-hi16.ll
index 0c61c58ef061923..6cd704e4a08bc11 100644
--- a/llvm/test/CodeGen/AMDGPU/load-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-hi16.ll
@@ -12,7 +12,7 @@ define <2 x i16> @load_local_lo_hi_v2i16_multi_use_lo(ptr addrspace(3) noalias %
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: ds_read_u16_d16_hi v1, v0 offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, -1
; GFX900-NEXT: ds_write_b16 v0, v2
; GFX900-NEXT: s_waitcnt lgkmcnt(1)
; GFX900-NEXT: v_mov_b32_e32 v0, v1
@@ -25,7 +25,7 @@ define <2 x i16> @load_local_lo_hi_v2i16_multi_use_lo(ptr addrspace(3) noalias %
; GFX906-NEXT: ds_read_u16 v1, v0
; GFX906-NEXT: ds_read_u16 v0, v0 offset:16
; GFX906-NEXT: s_mov_b32 s4, 0x5040100
-; GFX906-NEXT: v_mov_b32_e32 v2, 0
+; GFX906-NEXT: v_mov_b32_e32 v2, -1
; GFX906-NEXT: s_waitcnt lgkmcnt(1)
; GFX906-NEXT: ds_write_b16 v2, v1
; GFX906-NEXT: s_waitcnt lgkmcnt(1)
@@ -39,7 +39,7 @@ define <2 x i16> @load_local_lo_hi_v2i16_multi_use_lo(ptr addrspace(3) noalias %
; GFX803-NEXT: s_mov_b32 m0, -1
; GFX803-NEXT: ds_read_u16 v1, v0
; GFX803-NEXT: ds_read_u16 v0, v0 offset:16
-; GFX803-NEXT: v_mov_b32_e32 v2, 0
+; GFX803-NEXT: v_mov_b32_e32 v2, -1
; GFX803-NEXT: s_waitcnt lgkmcnt(1)
; GFX803-NEXT: ds_write_b16 v2, v1
; GFX803-NEXT: s_waitcnt lgkmcnt(1)
@@ -55,7 +55,7 @@ define <2 x i16> @load_local_lo_hi_v2i16_multi_use_lo(ptr addrspace(3) noalias %
; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, v2
; GFX900-FLATSCR-NEXT: ds_read_u16_d16_hi v1, v0 offset:16
-; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v0, -1
; GFX900-FLATSCR-NEXT: ds_write_b16 v0, v2
; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(1)
; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v0, v1
@@ -78,7 +78,7 @@ define <2 x i16> @load_local_lo_hi_v2i16_multi_use_hi(ptr addrspace(3) noalias %
; GFX900-NEXT: ds_read_u16 v1, v0 offset:16
; GFX900-NEXT: ds_read_u16 v0, v0
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, -1
; GFX900-NEXT: s_waitcnt lgkmcnt(1)
; GFX900-NEXT: ds_write_b16 v2, v1
; GFX900-NEXT: s_waitcnt lgkmcnt(1)
@@ -92,7 +92,7 @@ define <2 x i16> @load_local_lo_hi_v2i16_multi_use_hi(ptr addrspace(3) noalias %
; GFX906-NEXT: ds_read_u16 v1, v0 offset:16
; GFX906-NEXT: ds_read_u16 v0, v0
; GFX906-NEXT: s_mov_b32 s4, 0x5040100
-; GFX906-NEXT: v_mov_b32_e32 v2, 0
+; GFX906-NEXT: v_mov_b32_e32 v2, -1
; GFX906-NEXT: s_waitcnt lgkmcnt(1)
; GFX906-NEXT: ds_write_b16 v2, v1
; GFX906-NEXT: s_waitcnt lgkmcnt(1)
@@ -106,7 +106,7 @@ define <2 x i16> @load_local_lo_hi_v2i16_multi_use_hi(ptr addrspace(3) noalias %
; GFX803-NEXT: s_mov_b32 m0, -1
; GFX803-NEXT: ds_read_u16 v1, v0 offset:16
; GFX803-NEXT: ds_read_u16 v0, v0
-; GFX803-NEXT: v_mov_b32_e32 v2, 0
+; GFX803-NEXT: v_mov_b32_e32 v2, -1
; GFX803-NEXT: s_waitcnt lgkmcnt(1)
; GFX803-NEXT: ds_write_b16 v2, v1
; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
@@ -121,7 +121,7 @@ define <2 x i16> @load_local_lo_hi_v2i16_multi_use_hi(ptr addrspace(3) noalias %
; GFX900-FLATSCR-NEXT: ds_read_u16 v1, v0 offset:16
; GFX900-FLATSCR-NEXT: ds_read_u16 v0, v0
; GFX900-FLATSCR-NEXT: s_mov_b32 s0, 0x5040100
-; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v2, -1
; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(1)
; GFX900-FLATSCR-NEXT: ds_write_b16 v2, v1
; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(1)
diff --git a/llvm/test/CodeGen/AMDGPU/load-lo16.ll b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
index 3ef86c13e150acc..680006523b9fa32 100644
--- a/llvm/test/CodeGen/AMDGPU/load-lo16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
@@ -593,7 +593,7 @@ define void @load_local_lo_v2i16_reghi_vreg_multi_use_lo(ptr addrspace(3) %in, <
; GFX900-MUBUF: ; %bb.0: ; %entry
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-MUBUF-NEXT: ds_read_u16 v0, v0
-; GFX900-MUBUF-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-MUBUF-NEXT: v_mov_b32_e32 v2, -1
; GFX900-MUBUF-NEXT: s_mov_b32 s4, 0xffff
; GFX900-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-MUBUF-NEXT: ds_write_b16 v2, v0
@@ -606,7 +606,7 @@ define void @load_local_lo_v2i16_reghi_vreg_multi_use_lo(ptr addrspace(3) %in, <
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: ds_read_u16 v0, v0
-; GFX906-NEXT: v_mov_b32_e32 v2, 0
+; GFX906-NEXT: v_mov_b32_e32 v2, -1
; GFX906-NEXT: s_mov_b32 s4, 0xffff
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
; GFX906-NEXT: ds_write_b16 v2, v0
@@ -620,7 +620,7 @@ define void @load_local_lo_v2i16_reghi_vreg_multi_use_lo(ptr addrspace(3) %in, <
; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX803-NEXT: s_mov_b32 m0, -1
; GFX803-NEXT: ds_read_u16 v0, v0
-; GFX803-NEXT: v_mov_b32_e32 v2, 0
+; GFX803-NEXT: v_mov_b32_e32 v2, -1
; GFX803-NEXT: s_mov_b32 s4, 0x3020504
; GFX803-NEXT: s_waitcnt lgkmcnt(0)
; GFX803-NEXT: ds_write_b16 v2, v0
@@ -633,7 +633,7 @@ define void @load_local_lo_v2i16_reghi_vreg_multi_use_lo(ptr addrspace(3) %in, <
; GFX900-FLATSCR: ; %bb.0: ; %entry
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-FLATSCR-NEXT: ds_read_u16 v0, v0
-; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v2, -1
; GFX900-FLATSCR-NEXT: s_mov_b32 s0, 0xffff
; GFX900-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-FLATSCR-NEXT: ds_write_b16 v2, v0
@@ -656,7 +656,7 @@ define void @load_local_lo_v2i16_reghi_vreg_multi_use_hi(ptr addrspace(3) %in, <
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX900-NEXT: ds_read_u16_d16 v1, v0
-; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, -1
; GFX900-NEXT: ds_write_b16 v0, v2
; GFX900-NEXT: s_waitcnt lgkmcnt(1)
; GFX900-NEXT: global_store_dword v[0:1], v1, off
@@ -669,7 +669,7 @@ define void @load_local_lo_v2i16_reghi_vreg_multi_use_hi(ptr addrspace(3) %in, <
; GFX906-NEXT: ds_read_u16 v0, v0
; GFX906-NEXT: s_mov_b32 s4, 0xffff
; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX906-NEXT: v_mov_b32_e32 v3, 0
+; GFX906-NEXT: v_mov_b32_e32 v3, -1
; GFX906-NEXT: ds_write_b16 v3, v2
; GFX906-NEXT: s_waitcnt lgkmcnt(1)
; GFX906-NEXT: v_bfi_b32 v0, s4, v0, v1
@@ -684,7 +684,7 @@ define void @load_local_lo_v2i16_reghi_vreg_multi_use_hi(ptr addrspace(3) %in, <
; GFX803-NEXT: ds_read_u16 v0, v0
; GFX803-NEXT: s_mov_b32 s4, 0x3020504
; GFX803-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX803-NEXT: v_mov_b32_e32 v3, 0
+; GFX803-NEXT: v_mov_b32_e32 v3, -1
; GFX803-NEXT: ds_write_b16 v3, v2
; GFX803-NEXT: s_waitcnt lgkmcnt(1)
; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll
index b8e74bc7db09a1a..9fda59ceb267569 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll
@@ -12,7 +12,7 @@ define void @machinesink_loop_variable_out_of_divergent_loop(i32 %arg, i1 %cmp49
; CHECK-NEXT: v_and_b32_e32 v3, 1, v3
; CHECK-NEXT: s_mov_b32 s5, 0
; CHECK-NEXT: v_cmp_eq_u32_e64 s4, 1, v1
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: v_mov_b32_e32 v1, -1
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
; CHECK-NEXT: s_xor_b32 s6, s4, -1
; CHECK-NEXT: s_inst_prefetch 0x1
diff --git a/llvm/test/CodeGen/AMDGPU/nullptr-lowering.ll b/llvm/test/CodeGen/AMDGPU/nullptr-lowering.ll
new file mode 100644
index 000000000000000..691cfd62708690e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/nullptr-lowering.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
+
+define i32 @nullptr_p0(ptr %p) {
+; CHECK-LABEL: nullptr_p0:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq ptr %p, null
+ br i1 %cmp, label %bb.0, label %bb.1
+bb.0:
+ ret i32 0
+bb.1:
+ ret i32 1
+}
+
+define i32 @nullptr_p1(ptr addrspace(1) %p) {
+; CHECK-LABEL: nullptr_p1:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq ptr addrspace(1) %p, null
+ br i1 %cmp, label %bb.0, label %bb.1
+bb.0:
+ ret i32 0
+bb.1:
+ ret i32 1
+}
+
+define i32 @nullptr_p3(ptr addrspace(3) %p) {
+; CHECK-LABEL: nullptr_p3:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq ptr addrspace(3) %p, null
+ br i1 %cmp, label %bb.0, label %bb.1
+bb.0:
+ ret i32 0
+bb.1:
+ ret i32 1
+}
+
+define i32 @nullptr_p4(ptr addrspace(4) %p) {
+; CHECK-LABEL: nullptr_p4:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq ptr addrspace(4) %p, null
+ br i1 %cmp, label %bb.0, label %bb.1
+bb.0:
+ ret i32 0
+bb.1:
+ ret i32 1
+}
+
+define i32 @nullptr_p5(ptr addrspace(5) %p) {
+; CHECK-LABEL: nullptr_p5:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq ptr addrspace(5) %p, null
+ br i1 %cmp, label %bb.0, label %bb.1
+bb.0:
+ ret i32 0
+bb.1:
+ ret i32 1
+}
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index a1197aeace86f04..01c2e179d0c41fb 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -2560,12 +2560,8 @@ entry:
define amdgpu_kernel void @negativeoffsetnullptr(ptr %buffer) {
; GFX8-LABEL: negativeoffsetnullptr:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_load_dword s1, s[4:5], 0xec
-; GFX8-NEXT: s_add_u32 s0, 0, -1
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_addc_u32 s1, s1, -1
-; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: v_mov_b32_e32 v0, -1
+; GFX8-NEXT: v_mov_b32_e32 v1, -1
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
; GFX8-NEXT: s_mov_b64 s[0:1], 0
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -2579,32 +2575,27 @@ define amdgpu_kernel void @negativeoffsetnullptr(ptr %buffer) {
; GFX8-NEXT: ; %bb.2: ; %end
; GFX8-NEXT: s_endpgm
;
-; GFX9-LABEL: negativeoffsetnullptr:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b64 s[0:1], src_private_base
-; GFX9-NEXT: v_mov_b32_e32 v1, s1
-; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, -1, 0
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
-; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], 0
-; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
-; GFX9-NEXT: .LBB8_1: ; %branch
-; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_and_b64 s[2:3], exec, vcc
-; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1]
-; GFX9-NEXT: s_cbranch_execnz .LBB8_1
-; GFX9-NEXT: ; %bb.2: ; %end
-; GFX9-NEXT: s_endpgm
+; GFX900-LABEL: negativeoffsetnullptr:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: v_mov_b32_e32 v0, -1
+; GFX900-NEXT: v_mov_b32_e32 v1, -1
+; GFX900-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX900-NEXT: s_mov_b64 s[0:1], 0
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX900-NEXT: .LBB8_1: ; %branch
+; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX900-NEXT: s_and_b64 s[2:3], exec, vcc
+; GFX900-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
+; GFX900-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GFX900-NEXT: s_cbranch_execnz .LBB8_1
+; GFX900-NEXT: ; %bb.2: ; %end
+; GFX900-NEXT: s_endpgm
;
; GFX10-LABEL: negativeoffsetnullptr:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b64 s[0:1], src_private_base
-; GFX10-NEXT: s_add_u32 s0, 0, -1
-; GFX10-NEXT: s_addc_u32 s1, s1, -1
-; GFX10-NEXT: v_mov_b32_e32 v0, s0
-; GFX10-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-NEXT: v_mov_b32_e32 v0, -1
+; GFX10-NEXT: v_mov_b32_e32 v1, -1
; GFX10-NEXT: s_mov_b32 s0, 0
; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -2618,12 +2609,26 @@ define amdgpu_kernel void @negativeoffsetnullptr(ptr %buffer) {
; GFX10-NEXT: ; %bb.2: ; %end
; GFX10-NEXT: s_endpgm
;
+; GFX90A-LABEL: negativeoffsetnullptr:
+; GFX90A: ; %bb.0: ; %entry
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], -1, -1
+; GFX90A-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX90A-NEXT: s_mov_b64 s[0:1], 0
+; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX90A-NEXT: .LBB8_1: ; %branch
+; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT: s_and_b64 s[2:3], exec, vcc
+; GFX90A-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
+; GFX90A-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GFX90A-NEXT: s_cbranch_execnz .LBB8_1
+; GFX90A-NEXT: ; %bb.2: ; %end
+; GFX90A-NEXT: s_endpgm
+;
; GFX11-LABEL: negativeoffsetnullptr:
; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_mov_b64 s[0:1], src_private_base
-; GFX11-NEXT: v_add_co_u32 v0, s0, -1, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX11-NEXT: v_mov_b32_e32 v0, -1
+; GFX11-NEXT: v_mov_b32_e32 v1, -1
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: flat_load_u8 v0, v[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll
index 6eae905278f3ede..a212b00263d987e 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll
@@ -30,7 +30,8 @@ define amdgpu_kernel void @widget(ptr addrspace(1) %arg, i1 %arg1, ptr addrspace
; CHECK-NEXT: s_cbranch_vccz .LBB0_2
; CHECK-NEXT: ; %bb.1: ; %bb19
; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; CHECK-NEXT: ds_write_b32 v1, v1
+; CHECK-NEXT: v_mov_b32_e32 v2, -1
+; CHECK-NEXT: ds_write_b32 v2, v1
; CHECK-NEXT: .LBB0_2: ; %bb20
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; CHECK-NEXT: s_mov_b32 s0, exec_lo
diff --git a/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll b/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll
index ace4907670d3796..4bf798e661661f8 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll
@@ -11,7 +11,7 @@ define i32 @f() {
; CHECK-LABEL: f:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v0, -1
; CHECK-NEXT: ds_read_b32 v0, v0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
diff --git a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
index 8f16fcf6d08906a..4ad72a3cf7c1c91 100644
--- a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
+++ b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
@@ -390,19 +390,19 @@ define void @func_stackrestore_null() {
; WAVE32-OPT-LABEL: func_stackrestore_null:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-OPT-NEXT: s_mov_b32 s32, 0
+; WAVE32-OPT-NEXT: s_movk_i32 s32, 0xffe0
; WAVE32-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-OPT-LABEL: func_stackrestore_null:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE64-OPT-NEXT: s_mov_b32 s32, 0
+; WAVE64-OPT-NEXT: s_movk_i32 s32, 0xffc0
; WAVE64-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-O0-LABEL: func_stackrestore_null:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-O0-NEXT: s_mov_b32 s4, 0
+; WAVE32-O0-NEXT: s_mov_b32 s4, -1
; WAVE32-O0-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-O0-NEXT: s_mov_b32 s32, s4
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
@@ -410,7 +410,7 @@ define void @func_stackrestore_null() {
; WAVE64-O0-LABEL: func_stackrestore_null:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE64-O0-NEXT: s_mov_b32 s4, 0
+; WAVE64-O0-NEXT: s_mov_b32 s4, -1
; WAVE64-O0-NEXT: s_lshl_b32 s4, s4, 6
; WAVE64-O0-NEXT: s_mov_b32 s32, s4
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
@@ -418,7 +418,7 @@ define void @func_stackrestore_null() {
; WAVE32-WWM-PREALLOC-LABEL: func_stackrestore_null:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s4, 0
+; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s4, -1
; WAVE32-WWM-PREALLOC-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.convergencetokens.ll b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.convergencetokens.ll
index c0587d260c6f23d..fb3cbc218edcae8 100644
--- a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.convergencetokens.ll
+++ b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.convergencetokens.ll
@@ -22,7 +22,7 @@ define void @tail_call_i64_inreg_uniform_in_vgpr_convergence_tokens() #0 {
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[CONVERGENCECTRL_ENTRY:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ENTRY
- ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; CHECK-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[DS_READ_B64_gfx9_]].sub1
; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[DS_READ_B64_gfx9_]].sub0
diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.ll b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.ll
index 80dae9142870a2f..c362c66805f508e 100644
--- a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.ll
+++ b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.ll
@@ -56,7 +56,7 @@ define void @tail_call_i64_inreg_uniform_in_vgpr() {
; CHECK-LABEL: tail_call_i64_inreg_uniform_in_vgpr:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v0, -1
; CHECK-NEXT: ds_read_b64 v[0:1], v0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, void_func_i64_inreg at gotpcrel32@lo+4
diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-uniform-target-in-vgprs-issue110930.convergencetokens.ll b/llvm/test/CodeGen/AMDGPU/tail-call-uniform-target-in-vgprs-issue110930.convergencetokens.ll
index ac449f972acb504..fd2d831f7c7f6fb 100644
--- a/llvm/test/CodeGen/AMDGPU/tail-call-uniform-target-in-vgprs-issue110930.convergencetokens.ll
+++ b/llvm/test/CodeGen/AMDGPU/tail-call-uniform-target-in-vgprs-issue110930.convergencetokens.ll
@@ -23,7 +23,7 @@ define void @tail_call_uniform_vgpr_value_convergence_tokens() #0 {
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[CONVERGENCECTRL_ENTRY:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ENTRY
- ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; CHECK-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[DS_READ_B64_gfx9_]].sub1
; CHECK-NEXT: CONVERGENCECTRL_GLUE [[CONVERGENCECTRL_ENTRY]]
diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-uniform-target-in-vgprs-issue110930.ll b/llvm/test/CodeGen/AMDGPU/tail-call-uniform-target-in-vgprs-issue110930.ll
index b5a68720dc19f5b..799e58f874445a1 100644
--- a/llvm/test/CodeGen/AMDGPU/tail-call-uniform-target-in-vgprs-issue110930.ll
+++ b/llvm/test/CodeGen/AMDGPU/tail-call-uniform-target-in-vgprs-issue110930.ll
@@ -9,7 +9,7 @@ define void @tail_call_uniform_vgpr_value() {
; CHECK-LABEL: tail_call_uniform_vgpr_value:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v0, -1
; CHECK-NEXT: ds_read_b64 v[0:1], v0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_readfirstlane_b32 s17, v1
diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
index dd78c2f46dde8a2..14fe9fda0cdaf9f 100644
--- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
+++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
@@ -78,6 +78,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_mov_b32 s71, s15
; GLOBALNESS1-NEXT: s_mov_b32 s72, s14
; GLOBALNESS1-NEXT: s_mov_b64 s[34:35], s[10:11]
+; GLOBALNESS1-NEXT: v_mov_b32_e32 v40, -1
; GLOBALNESS1-NEXT: s_mov_b32 s32, 0
; GLOBALNESS1-NEXT: ; implicit-def: $vgpr44_vgpr45
; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0)
@@ -113,10 +114,10 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: ; Child Loop BB1_16 Depth 2
; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0x80
; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0
-; GLOBALNESS1-NEXT: flat_load_dword v40, v[0:1]
-; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
-; GLOBALNESS1-NEXT: buffer_store_dword v42, off, s[0:3], 0
; GLOBALNESS1-NEXT: flat_load_dword v46, v[0:1]
+; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
+; GLOBALNESS1-NEXT: buffer_store_dword v42, v40, s[0:3], 0 offen
+; GLOBALNESS1-NEXT: flat_load_dword v47, v[0:1]
; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
@@ -180,8 +181,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[42:43], off
; GLOBALNESS1-NEXT: .LBB1_13: ; %bb44.lr.ph.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v46
-; GLOBALNESS1-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc
+; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v47
+; GLOBALNESS1-NEXT: v_cndmask_b32_e32 v2, 0, v46, vcc
; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0)
; GLOBALNESS1-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
@@ -370,6 +371,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_mov_b32 s69, s15
; GLOBALNESS0-NEXT: s_mov_b32 s70, s14
; GLOBALNESS0-NEXT: s_mov_b64 s[34:35], s[10:11]
+; GLOBALNESS0-NEXT: v_mov_b32_e32 v40, -1
; GLOBALNESS0-NEXT: s_mov_b32 s32, 0
; GLOBALNESS0-NEXT: ; implicit-def: $vgpr44_vgpr45
; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0)
@@ -405,10 +407,10 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: ; Child Loop BB1_16 Depth 2
; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0x80
; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0
-; GLOBALNESS0-NEXT: flat_load_dword v40, v[0:1]
-; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
-; GLOBALNESS0-NEXT: buffer_store_dword v42, off, s[0:3], 0
; GLOBALNESS0-NEXT: flat_load_dword v46, v[0:1]
+; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
+; GLOBALNESS0-NEXT: buffer_store_dword v42, v40, s[0:3], 0 offen
+; GLOBALNESS0-NEXT: flat_load_dword v47, v[0:1]
; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
@@ -472,8 +474,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[42:43], off
; GLOBALNESS0-NEXT: .LBB1_13: ; %bb44.lr.ph.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v46
-; GLOBALNESS0-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc
+; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v47
+; GLOBALNESS0-NEXT: v_cndmask_b32_e32 v2, 0, v46, vcc
; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0)
; GLOBALNESS0-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
index d9df80ce6c1c04c..54756a1581f16e3 100644
--- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
+++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
@@ -257,41 +257,42 @@ define hidden void @blam() {
; GCN-NEXT: s_mov_b32 s16, s33
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[18:19]
-; GCN-NEXT: v_writelane_b32 v45, s16, 26
+; GCN-NEXT: v_writelane_b32 v46, s16, 26
; GCN-NEXT: s_addk_i32 s32, 0x800
-; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT: v_writelane_b32 v45, s30, 0
-; GCN-NEXT: v_writelane_b32 v45, s31, 1
-; GCN-NEXT: v_writelane_b32 v45, s34, 2
-; GCN-NEXT: v_writelane_b32 v45, s35, 3
-; GCN-NEXT: v_writelane_b32 v45, s36, 4
-; GCN-NEXT: v_writelane_b32 v45, s37, 5
-; GCN-NEXT: v_writelane_b32 v45, s38, 6
-; GCN-NEXT: v_writelane_b32 v45, s39, 7
-; GCN-NEXT: v_writelane_b32 v45, s40, 8
-; GCN-NEXT: v_writelane_b32 v45, s41, 9
-; GCN-NEXT: v_writelane_b32 v45, s42, 10
-; GCN-NEXT: v_writelane_b32 v45, s43, 11
-; GCN-NEXT: v_writelane_b32 v45, s44, 12
-; GCN-NEXT: v_writelane_b32 v45, s45, 13
-; GCN-NEXT: v_writelane_b32 v45, s46, 14
-; GCN-NEXT: v_writelane_b32 v45, s47, 15
-; GCN-NEXT: v_writelane_b32 v45, s48, 16
-; GCN-NEXT: v_writelane_b32 v45, s49, 17
-; GCN-NEXT: v_writelane_b32 v45, s50, 18
-; GCN-NEXT: v_writelane_b32 v45, s51, 19
-; GCN-NEXT: v_writelane_b32 v45, s52, 20
-; GCN-NEXT: v_writelane_b32 v45, s53, 21
-; GCN-NEXT: v_writelane_b32 v45, s54, 22
-; GCN-NEXT: v_writelane_b32 v45, s55, 23
-; GCN-NEXT: v_writelane_b32 v45, s56, 24
-; GCN-NEXT: v_writelane_b32 v45, s57, 25
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT: v_writelane_b32 v46, s30, 0
+; GCN-NEXT: v_writelane_b32 v46, s31, 1
+; GCN-NEXT: v_writelane_b32 v46, s34, 2
+; GCN-NEXT: v_writelane_b32 v46, s35, 3
+; GCN-NEXT: v_writelane_b32 v46, s36, 4
+; GCN-NEXT: v_writelane_b32 v46, s37, 5
+; GCN-NEXT: v_writelane_b32 v46, s38, 6
+; GCN-NEXT: v_writelane_b32 v46, s39, 7
+; GCN-NEXT: v_writelane_b32 v46, s40, 8
+; GCN-NEXT: v_writelane_b32 v46, s41, 9
+; GCN-NEXT: v_writelane_b32 v46, s42, 10
+; GCN-NEXT: v_writelane_b32 v46, s43, 11
+; GCN-NEXT: v_writelane_b32 v46, s44, 12
+; GCN-NEXT: v_writelane_b32 v46, s45, 13
+; GCN-NEXT: v_writelane_b32 v46, s46, 14
+; GCN-NEXT: v_writelane_b32 v46, s47, 15
+; GCN-NEXT: v_writelane_b32 v46, s48, 16
+; GCN-NEXT: v_writelane_b32 v46, s49, 17
+; GCN-NEXT: v_writelane_b32 v46, s50, 18
+; GCN-NEXT: v_writelane_b32 v46, s51, 19
+; GCN-NEXT: v_writelane_b32 v46, s52, 20
+; GCN-NEXT: v_writelane_b32 v46, s53, 21
+; GCN-NEXT: v_writelane_b32 v46, s54, 22
+; GCN-NEXT: v_writelane_b32 v46, s55, 23
+; GCN-NEXT: v_writelane_b32 v46, s56, 24
+; GCN-NEXT: v_writelane_b32 v46, s57, 25
; GCN-NEXT: v_mov_b32_e32 v40, v31
; GCN-NEXT: s_mov_b32 s46, s15
; GCN-NEXT: s_mov_b32 s47, s14
@@ -304,14 +305,15 @@ define hidden void @blam() {
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_and_b32_e32 v2, 0x3ff, v40
-; GCN-NEXT: flat_load_dword v43, v[0:1]
; GCN-NEXT: v_mov_b32_e32 v42, 0
+; GCN-NEXT: flat_load_dword v43, v[0:1]
; GCN-NEXT: s_mov_b64 s[50:51], 0
+; GCN-NEXT: v_mov_b32_e32 v44, -1
; GCN-NEXT: v_lshlrev_b32_e32 v41, 2, v2
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: v_cmp_eq_f32_e64 s[52:53], 0, v43
; GCN-NEXT: v_cmp_neq_f32_e64 s[42:43], 0, v43
-; GCN-NEXT: v_mov_b32_e32 v44, 0x7fc00000
+; GCN-NEXT: v_mov_b32_e32 v45, 0x7fc00000
; GCN-NEXT: s_branch .LBB1_2
; GCN-NEXT: .LBB1_1: ; %Flow7
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
@@ -323,7 +325,7 @@ define hidden void @blam() {
; GCN-NEXT: .LBB1_2: ; %bb2
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: flat_load_dword v0, v[41:42]
-; GCN-NEXT: buffer_store_dword v42, off, s[0:3], 0
+; GCN-NEXT: buffer_store_dword v42, v44, s[0:3], 0 offen
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: s_waitcnt vmcnt(1)
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 2, v0
@@ -362,7 +364,7 @@ define hidden void @blam() {
; GCN-NEXT: s_cbranch_execz .LBB1_7
; GCN-NEXT: ; %bb.6: ; %bb16
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
-; GCN-NEXT: buffer_store_dword v44, off, s[0:3], 0
+; GCN-NEXT: buffer_store_dword v45, v44, s[0:3], 0 offen
; GCN-NEXT: s_or_b64 s[8:9], s[52:53], exec
; GCN-NEXT: .LBB1_7: ; %Flow3
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
@@ -374,7 +376,7 @@ define hidden void @blam() {
; GCN-NEXT: ; %bb.8: ; %bb17
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_mov_b64 s[6:7], exec
-; GCN-NEXT: buffer_store_dword v43, off, s[0:3], 0
+; GCN-NEXT: buffer_store_dword v43, v44, s[0:3], 0 offen
; GCN-NEXT: .LBB1_9: ; %Flow4
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
@@ -404,7 +406,7 @@ define hidden void @blam() {
; GCN-NEXT: s_cbranch_execz .LBB1_15
; GCN-NEXT: ; %bb.14: ; %bb10
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
-; GCN-NEXT: buffer_store_dword v44, off, s[0:3], 0
+; GCN-NEXT: buffer_store_dword v45, v44, s[0:3], 0 offen
; GCN-NEXT: s_or_b64 s[10:11], s[6:7], exec
; GCN-NEXT: .LBB1_15: ; %Flow6
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
@@ -422,46 +424,47 @@ define hidden void @blam() {
; GCN-NEXT: s_cbranch_execz .LBB1_1
; GCN-NEXT: ; %bb.17: ; %bb18
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
-; GCN-NEXT: buffer_store_dword v44, off, s[0:3], 0
+; GCN-NEXT: buffer_store_dword v45, v44, s[0:3], 0 offen
; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
; GCN-NEXT: s_branch .LBB1_1
; GCN-NEXT: .LBB1_18: ; %DummyReturnBlock
; GCN-NEXT: s_or_b64 exec, exec, s[50:51]
-; GCN-NEXT: v_readlane_b32 s57, v45, 25
-; GCN-NEXT: v_readlane_b32 s56, v45, 24
-; GCN-NEXT: v_readlane_b32 s55, v45, 23
-; GCN-NEXT: v_readlane_b32 s54, v45, 22
-; GCN-NEXT: v_readlane_b32 s53, v45, 21
-; GCN-NEXT: v_readlane_b32 s52, v45, 20
-; GCN-NEXT: v_readlane_b32 s51, v45, 19
-; GCN-NEXT: v_readlane_b32 s50, v45, 18
-; GCN-NEXT: v_readlane_b32 s49, v45, 17
-; GCN-NEXT: v_readlane_b32 s48, v45, 16
-; GCN-NEXT: v_readlane_b32 s47, v45, 15
-; GCN-NEXT: v_readlane_b32 s46, v45, 14
-; GCN-NEXT: v_readlane_b32 s45, v45, 13
-; GCN-NEXT: v_readlane_b32 s44, v45, 12
-; GCN-NEXT: v_readlane_b32 s43, v45, 11
-; GCN-NEXT: v_readlane_b32 s42, v45, 10
-; GCN-NEXT: v_readlane_b32 s41, v45, 9
-; GCN-NEXT: v_readlane_b32 s40, v45, 8
-; GCN-NEXT: v_readlane_b32 s39, v45, 7
-; GCN-NEXT: v_readlane_b32 s38, v45, 6
-; GCN-NEXT: v_readlane_b32 s37, v45, 5
-; GCN-NEXT: v_readlane_b32 s36, v45, 4
-; GCN-NEXT: v_readlane_b32 s35, v45, 3
-; GCN-NEXT: v_readlane_b32 s34, v45, 2
-; GCN-NEXT: v_readlane_b32 s31, v45, 1
-; GCN-NEXT: v_readlane_b32 s30, v45, 0
-; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT: v_readlane_b32 s57, v46, 25
+; GCN-NEXT: v_readlane_b32 s56, v46, 24
+; GCN-NEXT: v_readlane_b32 s55, v46, 23
+; GCN-NEXT: v_readlane_b32 s54, v46, 22
+; GCN-NEXT: v_readlane_b32 s53, v46, 21
+; GCN-NEXT: v_readlane_b32 s52, v46, 20
+; GCN-NEXT: v_readlane_b32 s51, v46, 19
+; GCN-NEXT: v_readlane_b32 s50, v46, 18
+; GCN-NEXT: v_readlane_b32 s49, v46, 17
+; GCN-NEXT: v_readlane_b32 s48, v46, 16
+; GCN-NEXT: v_readlane_b32 s47, v46, 15
+; GCN-NEXT: v_readlane_b32 s46, v46, 14
+; GCN-NEXT: v_readlane_b32 s45, v46, 13
+; GCN-NEXT: v_readlane_b32 s44, v46, 12
+; GCN-NEXT: v_readlane_b32 s43, v46, 11
+; GCN-NEXT: v_readlane_b32 s42, v46, 10
+; GCN-NEXT: v_readlane_b32 s41, v46, 9
+; GCN-NEXT: v_readlane_b32 s40, v46, 8
+; GCN-NEXT: v_readlane_b32 s39, v46, 7
+; GCN-NEXT: v_readlane_b32 s38, v46, 6
+; GCN-NEXT: v_readlane_b32 s37, v46, 5
+; GCN-NEXT: v_readlane_b32 s36, v46, 4
+; GCN-NEXT: v_readlane_b32 s35, v46, 3
+; GCN-NEXT: v_readlane_b32 s34, v46, 2
+; GCN-NEXT: v_readlane_b32 s31, v46, 1
+; GCN-NEXT: v_readlane_b32 s30, v46, 0
+; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b32 s32, s33
-; GCN-NEXT: v_readlane_b32 s4, v45, 26
+; GCN-NEXT: v_readlane_b32 s4, v46, 26
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_mov_b32 s33, s4
; GCN-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll b/llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
index 5b40d53e0a81c4d..79798d5c83b3dc0 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
@@ -5,7 +5,7 @@ define amdgpu_kernel void @icmp_test() {
; CHECK-LABEL: icmp_test:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: v_cmp_eq_u16_e64 s[0:1], 0, 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: v_mov_b32_e32 v1, -1
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
@@ -24,7 +24,7 @@ define amdgpu_kernel void @fcmp_test(half %x, half %y) {
; CHECK-LABEL: fcmp_test:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_load_b32 s0, s[4:5], 0x0
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: v_mov_b32_e32 v1, -1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_lshr_b32 s1, s0, 16
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -47,7 +47,7 @@ define amdgpu_kernel void @ballot_test(half %x, half %y) {
; CHECK-LABEL: ballot_test:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_b32 s0, s[4:5], 0x0
-; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: v_mov_b32_e32 v2, -1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_lshr_b32 s1, s0, 16
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
diff --git a/llvm/test/CodeGen/AMDGPU/waterfall_kills_scc.ll b/llvm/test/CodeGen/AMDGPU/waterfall_kills_scc.ll
index 6133cb469072378..8f072aff56f52e4 100644
--- a/llvm/test/CodeGen/AMDGPU/waterfall_kills_scc.ll
+++ b/llvm/test/CodeGen/AMDGPU/waterfall_kills_scc.ll
@@ -24,10 +24,11 @@ define amdgpu_kernel void @foo(i1 %cmp1) {
; GFX906-NEXT: s_mov_b32 s15, 0xe00000
; GFX906-NEXT: s_add_u32 s12, s12, s11
; GFX906-NEXT: s_addc_u32 s13, s13, 0
-; GFX906-NEXT: buffer_load_dword v3, off, s[12:15], 0
-; GFX906-NEXT: buffer_load_dword v4, off, s[12:15], 0 offset:4
-; GFX906-NEXT: buffer_load_dword v5, off, s[12:15], 0 offset:8
-; GFX906-NEXT: buffer_load_dword v6, off, s[12:15], 0 offset:12
+; GFX906-NEXT: v_mov_b32_e32 v7, -1
+; GFX906-NEXT: buffer_load_dword v4, off, s[12:15], 0 offset:3
+; GFX906-NEXT: buffer_load_dword v5, off, s[12:15], 0 offset:7
+; GFX906-NEXT: buffer_load_dword v3, v7, s[12:15], 0 offen
+; GFX906-NEXT: buffer_load_dword v6, off, s[12:15], 0 offset:11
; GFX906-NEXT: s_load_dword s2, s[4:5], 0x24
; GFX906-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1c
; GFX906-NEXT: s_mov_b32 s4, 0
@@ -43,12 +44,10 @@ define amdgpu_kernel void @foo(i1 %cmp1) {
; GFX906-NEXT: s_mov_b64 s[2:3], exec
; GFX906-NEXT: ds_write_b64 v2, v[0:1]
; GFX906-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
-; GFX906-NEXT: s_waitcnt vmcnt(3)
+; GFX906-NEXT: s_waitcnt vmcnt(1)
; GFX906-NEXT: v_readfirstlane_b32 s0, v3
-; GFX906-NEXT: s_waitcnt vmcnt(2)
; GFX906-NEXT: v_readfirstlane_b32 s1, v4
; GFX906-NEXT: v_cmp_eq_u64_e32 vcc, s[0:1], v[3:4]
-; GFX906-NEXT: s_waitcnt vmcnt(1)
; GFX906-NEXT: v_readfirstlane_b32 s0, v5
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: v_readfirstlane_b32 s1, v6
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index deab40758188057..3a42593a65d960c 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -3409,6 +3409,7 @@ define amdgpu_gs void @wqm_init_exec() {
; GFX9-W64-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX9-W64-NEXT: s_wqm_b64 exec, exec
; GFX9-W64-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $exec
+; GFX9-W64-NEXT: v_mov_b32_e32 v0, -1
; GFX9-W64-NEXT: v_mov_b32_e32 v1, s0
; GFX9-W64-NEXT: ds_write_b32 v0, v1
; GFX9-W64-NEXT: s_endpgm
@@ -3416,20 +3417,21 @@ define amdgpu_gs void @wqm_init_exec() {
; GFX10-W32-LABEL: wqm_init_exec:
; GFX10-W32: ; %bb.0: ; %bb
; GFX10-W32-NEXT: s_mov_b32 exec_lo, -1
-; GFX10-W32-NEXT: s_mov_b32 s1, exec_lo
-; GFX10-W32-NEXT: v_mov_b32_e32 v0, 0
; GFX10-W32-NEXT: s_mov_b32 s0, 0
+; GFX10-W32-NEXT: s_mov_b32 s2, exec_lo
+; GFX10-W32-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-W32-NEXT: s_mov_b32 s1, s0
; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX10-W32-NEXT: s_mov_b32 s2, s0
-; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s1
+; GFX10-W32-NEXT: s_mov_b32 s3, s0
+; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2
; GFX10-W32-NEXT: v_mov_b32_e32 v1, v0
; GFX10-W32-NEXT: v_mov_b32_e32 v2, v0
; GFX10-W32-NEXT: v_mov_b32_e32 v3, v0
-; GFX10-W32-NEXT: v_mov_b32_e32 v4, s0
-; GFX10-W32-NEXT: s_mov_b32 s1, s0
-; GFX10-W32-NEXT: s_mov_b32 s3, s0
+; GFX10-W32-NEXT: v_mov_b32_e32 v4, -1
+; GFX10-W32-NEXT: v_mov_b32_e32 v5, s0
+; GFX10-W32-NEXT: s_mov_b32 s2, s0
; GFX10-W32-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
-; GFX10-W32-NEXT: ds_write_b32 v0, v4
+; GFX10-W32-NEXT: ds_write_b32 v4, v5
; GFX10-W32-NEXT: s_endpgm
bb:
call void @llvm.amdgcn.init.exec(i64 -1)
More information about the llvm-commits
mailing list