[llvm] [AMDGPU][MC] Disallow null as saddr in flat instructions (PR #101730)
Jun Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 23 12:32:51 PDT 2024
https://github.com/jwanggit86 updated https://github.com/llvm/llvm-project/pull/101730
>From d25516b073e5db5da7e857cdeadbbdd43870f596 Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Fri, 2 Aug 2024 11:13:32 -0700
Subject: [PATCH 1/7] [AMDGPU][MC] Disallow null as saddr in flat instructions
Some flat instructions have an saddr operand. When 'null' is provided
as saddr, it may have the same encoding as another instruction. For
example, the instructions 'global_atomic_add v1, v2, null' and
'global_atomic_add v[1:2], v2, off' have the same encoding. This patch
disallows having null as saddr.
---
.../Disassembler/AMDGPUDisassembler.cpp | 1 +
llvm/lib/Target/AMDGPU/FLATInstructions.td | 8 +-
llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 8 +
llvm/test/MC/AMDGPU/flat-global.s | 4 +-
.../MC/AMDGPU/gfx10_flat_instructions_err.s | 268 ++++++++++++++++
.../MC/AMDGPU/gfx11_flat_instructions_err.s | 253 +++++++++++++++
.../MC/AMDGPU/gfx12_flat_instructions_err.s | 289 ++++++++++++++++++
7 files changed, 825 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/MC/AMDGPU/gfx10_flat_instructions_err.s
create mode 100644 llvm/test/MC/AMDGPU/gfx11_flat_instructions_err.s
create mode 100644 llvm/test/MC/AMDGPU/gfx12_flat_instructions_err.s
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 2f5eba47afc27f..ca4be01736c1f2 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -291,6 +291,7 @@ DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
DECODE_OPERAND_REG_7(SReg_64, OPW64)
DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
+DECODE_OPERAND_REG_7(SReg_64_XEXEC_XNULL, OPW64)
DECODE_OPERAND_REG_7(SReg_96, OPW96)
DECODE_OPERAND_REG_7(SReg_128, OPW128)
DECODE_OPERAND_REG_7(SReg_256, OPW256)
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 7b3822067072e5..6b5e47902c5a53 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -209,7 +209,7 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
!con(
!con(
!if(EnableSaddr,
- (ins SReg_64:$saddr, VGPR_32:$vaddr),
+ (ins SReg_64_XEXEC_XNULL:$saddr, VGPR_32:$vaddr),
(ins VReg_64:$vaddr)),
(ins flat_offset:$offset)),
// FIXME: Operands with default values do not work with following non-optional operands.
@@ -231,7 +231,7 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
(outs),
!con(
!if(EnableSaddr,
- (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr),
+ (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64_XEXEC_XNULL:$saddr),
(ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)),
(ins flat_offset:$offset, CPol_0:$cpol)),
" $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
@@ -589,7 +589,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
- (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol),
+ (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64_XEXEC_XNULL:$saddr, flat_offset:$offset, CPol_0:$cpol),
" $vaddr, $vdata, $saddr$offset$cpol">,
GlobalSaddrTable<1, opName> {
let has_saddr = 1;
@@ -620,7 +620,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_op:$vdst),
- (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
+ (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64_XEXEC_XNULL:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
" $vdst, $vaddr, $vdata, $saddr$offset$cpol">,
GlobalSaddrTable<1, opName#"_rtn"> {
let has_saddr = 1;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 443797ef0ff4bf..694b3ec92fb781 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -849,6 +849,14 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16],
let HasSGPR = 1;
}
+def SReg_64_XEXEC_XNULL : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
+ (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SRC_SHARED_BASE,
+ SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
+ let CopyCost = 1;
+ let AllocationPriority = 1;
+ let HasSGPR = 1;
+}
+
def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE,
SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
diff --git a/llvm/test/MC/AMDGPU/flat-global.s b/llvm/test/MC/AMDGPU/flat-global.s
index e81fae86b05583..b35ad87edeea35 100644
--- a/llvm/test/MC/AMDGPU/flat-global.s
+++ b/llvm/test/MC/AMDGPU/flat-global.s
@@ -212,8 +212,8 @@ global_store_dword v3, v1, s[2:3] offset:-8
// XXX: Is this valid?
global_store_dword v3, v1, exec
-// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
-// GFX9: global_store_dword v3, v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
+// GFX10-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX9-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
// VI-ERR: :[[@LINE-3]]:{{[0-9]+}}: error: instruction not supported on this GPU
global_load_dword v1, v[3:4], s2
diff --git a/llvm/test/MC/AMDGPU/gfx10_flat_instructions_err.s b/llvm/test/MC/AMDGPU/gfx10_flat_instructions_err.s
new file mode 100644
index 00000000000000..193e91e1d0bb13
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx10_flat_instructions_err.s
@@ -0,0 +1,268 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=GFX1010,GFX10 --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1030 %s 2>&1 | FileCheck --check-prefixes=GFX1030,GFX10 --implicit-check-not=error: %s
+
+global_atomic_add v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_add v0, v2, v4, null glc
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_add_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_add_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_and v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_and v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_and_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_and_x2 v0, v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_cmpswap v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_cmpswap v0, v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_x2 v2, v[4:7], null
+// GFX10: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+global_atomic_cmpswap_x2 v[0:1], v2, v[4:7], null
+// GFX10: :[[@LINE-1]]:46: error: invalid operand for instruction
+
+global_atomic_csub v2, v4, null
+// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX1030: :[[@LINE-2]]:28: error: invalid operand for instruction
+
+global_atomic_csub v0, v2, v4, null
+// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX1030: :[[@LINE-2]]:32: error: invalid operand for instruction
+
+global_atomic_dec v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_dec v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_dec_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_dec_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_fcmpswap v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_fcmpswap v0, v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:40: error: invalid operand for instruction
+
+global_atomic_fcmpswap_x2 v2, v[4:7], null
+// GFX10: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_fcmpswap_x2 v[0:1], v2, v[4:7], null
+// GFX10: :[[@LINE-1]]:47: error: invalid operand for instruction
+
+global_atomic_fmax v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_fmax v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_fmax_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_fmax_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_fmin v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_fmin v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_fmin_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_fmin_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_inc v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_inc v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_inc_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_inc_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_or v2, v4, null
+// GFX10: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+global_atomic_or v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_atomic_or_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_atomic_or_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:41: error: invalid operand for instruction
+
+global_atomic_smax v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_smax v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_smax_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_smax_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_smin v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_smin v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_smin_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_smin_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_sub v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_sub v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_sub_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_sub_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_swap v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_swap v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_swap_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_swap_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_umax v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_umax v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_umax_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_umax_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_umin v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_umin v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_umin_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_umin_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_xor v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_xor v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_xor_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_xor_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_load_dword v0, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_load_dwordx2 v[0:1], v4, null
+// GFX10: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_dwordx3 v[0:2], v4, null
+// GFX10: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_dwordx4 v[0:3], v4, null
+// GFX10: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_sbyte v0, v2, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_load_sbyte_d16 v0, v2, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_sbyte_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_load_short_d16 v0, v2, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_short_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_load_sshort v0, v2, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_load_ubyte v0, v2, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_load_ubyte_d16 v0, v2, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_ubyte_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_load_ushort v0, v2, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_store_byte v0, v2, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_store_byte_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_dword v0, v2, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_store_dwordx2 v0, v[2:3], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_dwordx3 v0, v[2:4], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_dwordx4 v0, v[2:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_short v0, v2, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_store_short_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/gfx11_flat_instructions_err.s b/llvm/test/MC/AMDGPU/gfx11_flat_instructions_err.s
new file mode 100644
index 00000000000000..9e51b0b8cc8cda
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_flat_instructions_err.s
@@ -0,0 +1,253 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --check-prefixes=GFX11 --implicit-check-not=error: %s
+
+global_atomic_add_f32 v0, v2, null
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_f32 v0, v2, v4, null glc
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_u32 v0, v2, v4, null glc
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_add_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_add_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_and_b32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_and_b32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_and_b64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_and_b64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_cmpswap_b32 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_b32 v0, v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_cmpswap_b64 v0, v[2:5], null
+// GFX11: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_b64 v[0:1], v2, v[4:7], null
+// GFX11: :[[@LINE-1]]:47: error: invalid operand for instruction
+
+global_atomic_cmpswap_f32 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_f32 v0, v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_csub_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_csub_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_dec_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_dec_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_dec_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_dec_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_inc_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_inc_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_inc_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_inc_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_max_f32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_max_f32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_i32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_max_i32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_i64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_i64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_max_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_max_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_min_f32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_f32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_min_i32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_i32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_min_i64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_i64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_min_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_min_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_or_b32 v0, v2, null
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_atomic_or_b32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_or_b64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_or_b64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_sub_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_sub_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_sub_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_sub_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_swap_b32 v0, v2, null
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_swap_b32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_swap_b64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_swap_b64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:44: error: invalid operand for instruction
+
+global_atomic_xor_b32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_xor_b32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_xor_b64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_xor_b64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_load_b128 v[0:3], v4, null
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_load_b32 v0, v4, null
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_load_b64 v[0:1], v4, null
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+global_load_b96 v[0:2], v4, null
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+global_load_d16_b16 v0, v2, null
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+global_load_d16_hi_b16 v0, v2, null
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_load_d16_hi_i8 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_d16_hi_u8 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_d16_i8 v0, v2, null
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_load_d16_u8 v0, v2, null
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_load_i16 v0, v2, null
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_load_i8 v0, v2, null
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+global_load_u16 v0, v2, null
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_load_u8 v0, v2, null
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+global_store_b128 v0, v[2:5], null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_store_b16 v0, v2, null
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+global_store_b32 v0, v2, null
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+global_store_b64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_store_b8 v0, v2, null
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_store_b96 v0, v[2:4], null
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_store_d16_hi_b16 v0, v2, null
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_store_d16_hi_b8 v0, v2, null
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/gfx12_flat_instructions_err.s b/llvm/test/MC/AMDGPU/gfx12_flat_instructions_err.s
new file mode 100644
index 00000000000000..5e31002d0d92ba
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_flat_instructions_err.s
@@ -0,0 +1,289 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefixes=GFX12 --implicit-check-not=error: %s
+
+global_atomic_add_f32 v0, v2, null
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_f32 v0, v2, v4, null glc
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_u32 v0, v2, v4, null glc
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_add_u64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_add_u64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_and_b32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_and_b32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_and_b64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_and_b64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_cmpswap_b32 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_b32 v0, v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_cmpswap_b64 v0, v[2:5], null
+// GFX12: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_b64 v[0:1], v2, v[4:7], null
+// GFX12: :[[@LINE-1]]:47: error: invalid operand for instruction
+
+global_atomic_cond_sub_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_cond_sub_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:40: error: invalid operand for instruction
+
+global_atomic_dec_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_dec_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_dec_u64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_dec_u64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_inc_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_inc_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_inc_u64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_inc_u64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_max_i32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_max_i32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_i64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_i64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_max_num_f32 v0, v2, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_num_f32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_max_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_max_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_u64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_u64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_min_i32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_i32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_min_i64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_i64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_min_num_f32 v0, v2, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_num_f32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_min_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_min_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_u64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_u64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_or_b32 v0, v2, null
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_atomic_or_b32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_or_b64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_or_b64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_ordered_add_b64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_ordered_add_b64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:51: error: invalid operand for instruction
+
+global_atomic_pk_add_bf16 v0, v2, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_pk_add_bf16 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_pk_add_f16 v0, v2, null
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_pk_add_f16 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+global_atomic_sub_clamp_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+global_atomic_sub_clamp_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:41: error: invalid operand for instruction
+
+global_atomic_sub_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_sub_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_sub_u64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_sub_u64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_swap_b32 v0, v2, null
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_swap_b32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_swap_b64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_swap_b64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:44: error: invalid operand for instruction
+
+global_atomic_xor_b32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_xor_b32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_xor_b64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_xor_b64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_load_b128 v[0:3], v4, null
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_load_b32 v0, v4, null
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_load_b64 v[0:1], v4, null
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+global_load_b96 v[0:2], v4, null
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+global_load_block v[0:31], v32, null
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_d16_b16 v0, v2, null
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+global_load_d16_hi_b16 v0, v2, null
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_load_d16_hi_i8 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_d16_hi_u8 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_d16_i8 v0, v2, null
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_load_d16_u8 v0, v2, null
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_load_i16 v0, v2, null
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_load_i8 v0, v2, null
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+global_load_tr_b128 v[0:3], v4, null
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_tr_b128 v[0:1], v4, null
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_tr_b64 v[0:1], v4, null
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_load_tr_b64 v0, v4, null
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_load_u16 v0, v2, null
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_load_u8 v0, v2, null
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+global_store_b128 v0, v[2:5], null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_store_b16 v0, v2, null
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+global_store_b32 v0, v2, null
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+global_store_b64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_store_b8 v0, v2, null
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_store_b96 v0, v[2:4], null
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_store_block v32, v[0:31], null
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_d16_hi_b16 v0, v2, null
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_store_d16_hi_b8 v0, v2, null
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
>From 84d6f0ec66eddea841615c61e59dbe512a46ca49 Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Tue, 13 Aug 2024 17:33:11 -0700
Subject: [PATCH 2/7] This commit: (1) define SReg_64_XEXEC in terms of
SReg_64_XEXEC_XNULL (2) fix tests affected by the new reg class. Todo: in
final commit msg, mention EXEC as well.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +
llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 3 +-
.../GlobalISel/global-atomic-fadd.f32-rtn.ll | 6 +-
.../GlobalISel/global-atomic-fadd.f64.ll | 4 +-
.../global-atomic-fadd.v2f16-no-rtn.ll | 4 +-
.../global-atomic-fadd.v2f16-rtn.ll | 2 +-
...st-select-amdgpu-atomic-cmpxchg-global.mir | 8 +-
.../AMDGPU/GlobalISel/inst-select-copy.mir | 2 +-
.../GlobalISel/inst-select-fract.f64.mir | 12 ++-
.../inst-select-load-global-saddr.mir | 80 +++++++++----------
.../AMDGPU/GlobalISel/vni8-across-blocks.ll | 22 ++---
.../test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir | 12 +--
.../CodeGen/AMDGPU/expand-si-indirect.mir | 2 +-
.../AMDGPU/global-atomic-fadd.f32-no-rtn.ll | 6 +-
.../AMDGPU/global-atomic-fadd.f32-rtn.ll | 6 +-
llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll | 24 +++---
.../AMDGPU/llvm.amdgcn.wave.reduce.umax.mir | 8 +-
.../AMDGPU/llvm.amdgcn.wave.reduce.umin.mir | 8 +-
.../merge-flat-with-global-load-store.mir | 16 ++--
.../AMDGPU/merge-global-load-store.mir | 52 ++++++------
.../CodeGen/AMDGPU/move-load-addr-to-valu.mir | 22 ++---
.../CodeGen/AMDGPU/move-to-valu-addsubu64.ll | 10 +--
.../move-to-valu-pseudo-scalar-trans.ll | 20 ++---
...ask-pre-ra-non-empty-but-used-interval.mir | 4 +-
...al-regcopy-and-spill-missed-at-regalloc.ll | 8 +-
.../ran-out-of-sgprs-allocation-failure.mir | 4 +-
.../AMDGPU/sched-barrier-hang-weak-dep.mir | 4 +-
.../CodeGen/AMDGPU/sched-barrier-pre-RA.mir | 48 +++++------
.../sched-group-barrier-pipeline-solver.mir | 24 +++---
.../AMDGPU/sched-group-barrier-pre-RA.mir | 24 +++---
llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll | 2 +-
llvm/test/MC/AMDGPU/flat-global.s | 1 -
.../MC/Disassembler/AMDGPU/gfx10_flat.txt | 3 -
33 files changed, 227 insertions(+), 226 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9344d0c54ac1fc..de2401afdfbde5 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6245,6 +6245,8 @@ void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,
return;
Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI);
+ if (MRI.getRegClass(ToSGPR) == &AMDGPU::SReg_64RegClass)
+ MRI.setRegClass(ToSGPR, &AMDGPU::SReg_64_XEXEC_XNULLRegClass);
SAddr->setReg(ToSGPR);
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 694b3ec92fb781..ef9adde13348fe 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -858,8 +858,7 @@ def SReg_64_XEXEC_XNULL : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1,
}
def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
- (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE,
- SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
+ (add SReg_64_XEXEC_XNULL, SGPR_NULL64)> {
let CopyCost = 1;
let AllocationPriority = 1;
let HasSGPR = 1;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
index 8eef3d4ac4a3d6..07c97107d1b71f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
@@ -52,7 +52,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
@@ -146,7 +146,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX940-NEXT: {{ $}}
; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX940-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
@@ -240,7 +240,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GFX11-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_32_xm0_xexec = SI_PS_LIVE
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
index 80fa24471a459f..49c5dc7ed5a966 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
@@ -120,7 +120,7 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
@@ -138,7 +138,7 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
index db508b5aea8c56..1317770ad834c7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
@@ -36,7 +36,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn(ptr addrspace(1) in
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
@@ -48,7 +48,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn(ptr addrspace(1) in
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
index f11196be89bb1d..a65fc6c0c4cfe4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
@@ -25,7 +25,7 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn(ptr addrspace(1)
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir
index 6f0108317f5310..09eb77fcbdd9db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir
@@ -747,7 +747,7 @@ body: |
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
@@ -758,7 +758,7 @@ body: |
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
@@ -854,7 +854,7 @@ body: |
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
@@ -865,7 +865,7 @@ body: |
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
index 3428230606080b..e07d635855cfe4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
@@ -25,7 +25,7 @@ body: |
; WAVE32-LABEL: name: copy
; WAVE32: liveins: $sgpr2_sgpr3
; WAVE32-NEXT: {{ $}}
- ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr2_sgpr3
; WAVE32-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE32-NEXT: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
index 6a1e52cd29fd9f..52b1beb0b0594c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
@@ -26,8 +26,9 @@ body: |
; GFX10-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 1, [[V_FLOOR_F64_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]]
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX10-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
+ ; GFX10-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY5]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
;
; GFX11-LABEL: name: fract_f64_neg
@@ -44,8 +45,9 @@ body: |
; GFX11-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 1, [[V_FLOOR_F64_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]]
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX11-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
+ ; GFX11-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY5]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
; GFX11-NEXT: S_ENDPGM 0
%2:sgpr(p4) = COPY $sgpr0_sgpr1
%7:sgpr(s64) = G_CONSTANT i64 36
@@ -92,8 +94,9 @@ body: |
; GFX10-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 1, [[V_FLOOR_F64_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]]
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX10-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
+ ; GFX10-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY5]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
;
; GFX11-LABEL: name: fract_f64_neg_abs
@@ -110,8 +113,9 @@ body: |
; GFX11-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 1, [[V_FLOOR_F64_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]]
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX11-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
+ ; GFX11-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY5]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
; GFX11-NEXT: S_ENDPGM 0
%2:sgpr(p4) = COPY $sgpr0_sgpr1
%7:sgpr(s64) = G_CONSTANT i64 36
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
index cf4e6c8b85e3ee..65f6b8879e16fa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
@@ -17,7 +17,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -25,7 +25,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -33,7 +33,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -41,7 +41,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -67,7 +67,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -75,7 +75,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -83,7 +83,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
; GFX11: liveins: $sgpr0_sgpr1, $vgpr0
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -91,7 +91,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
; GFX12: liveins: $sgpr0_sgpr1, $vgpr0
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -120,7 +120,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -128,7 +128,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -136,7 +136,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
; GFX11: liveins: $sgpr0_sgpr1, $vgpr0
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -144,7 +144,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
; GFX12: liveins: $sgpr0_sgpr1, $vgpr0
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -266,7 +266,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -300,7 +300,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095
; GFX11: liveins: $sgpr0_sgpr1, $vgpr0
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -308,7 +308,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095
; GFX12: liveins: $sgpr0_sgpr1, $vgpr0
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -339,7 +339,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -373,7 +373,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096
; GFX11: liveins: $sgpr0_sgpr1, $vgpr0
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -381,7 +381,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096
; GFX12: liveins: $sgpr0_sgpr1, $vgpr0
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -410,7 +410,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4096
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -418,7 +418,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4096
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -426,7 +426,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4096
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -434,7 +434,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_4096
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4096, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -460,7 +460,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4097
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -468,7 +468,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4097
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -476,7 +476,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4097
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -484,7 +484,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_4097
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4097, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -518,7 +518,7 @@ body: |
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -558,7 +558,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -4097, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -584,7 +584,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_2049
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -592,7 +592,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_2049
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -600,7 +600,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_2049
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -608,7 +608,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_2049
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -634,7 +634,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -658,7 +658,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -666,7 +666,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -691,7 +691,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -699,7 +699,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -707,7 +707,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -715,7 +715,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4286578688, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 8388607, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -830,7 +830,7 @@ body: |
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
index fff06e4ad16c07..386e34f72ab734 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
@@ -7,33 +7,33 @@ define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX906-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX906-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX906-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX906-NEXT: v_mov_b32_e32 v3, 8
+; GFX906-NEXT: v_mov_b32_e32 v4, 8
; GFX906-NEXT: v_mov_b32_e32 v5, 16
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
-; GFX906-NEXT: global_load_dword v4, v2, s[4:5]
+; GFX906-NEXT: global_load_dword v3, v2, s[4:5]
; GFX906-NEXT: v_mov_b32_e32 v1, 0xff
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
; GFX906-NEXT: s_waitcnt vmcnt(0)
-; GFX906-NEXT: v_and_b32_e32 v6, 0xff, v4
-; GFX906-NEXT: v_lshlrev_b32_sdwa v7, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
-; GFX906-NEXT: v_lshlrev_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX906-NEXT: v_or3_b32 v4, v6, v7, v4
+; GFX906-NEXT: v_and_b32_e32 v6, 0xff, v3
+; GFX906-NEXT: v_lshlrev_b32_sdwa v7, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX906-NEXT: v_or3_b32 v3, v6, v7, v3
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX906-NEXT: s_cbranch_execz .LBB0_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dword v0, v2, s[6:7]
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: v_and_b32_e32 v2, 0xff, v0
-; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
; GFX906-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX906-NEXT: v_or3_b32 v4, v2, v3, v0
+; GFX906-NEXT: v_or3_b32 v3, v2, v3, v0
; GFX906-NEXT: .LBB0_2: ; %bb.2
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v4
+; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v3
; GFX906-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
-; GFX906-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX906-NEXT: v_and_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX906-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX906-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir b/llvm/test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir
index ffa9e643409d36..86a1a26cb7abfd 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir
@@ -12,7 +12,7 @@ body: |
; GFX908: liveins: $sgpr0_sgpr1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1123418112, implicit $exec
; GFX908-NEXT: undef [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 [[V_MOV_B32_e32_1]], implicit $exec
@@ -26,7 +26,7 @@ body: |
; GFX908-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
; GFX908-NEXT: S_ENDPGM 0
%1:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64(p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1:sgpr_64(p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%13:vgpr_32 = V_MOV_B32_e32 1123418112, implicit $exec
undef %11.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 %13:vgpr_32, implicit $exec
@@ -37,7 +37,7 @@ body: |
%9:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
%10:areg_128 = V_MFMA_F32_4X4X1F32_e64 %9:vgpr_32, %8:vgpr_32, %11:areg_128, 0, 0, 0, implicit $mode, implicit $exec
%12:vreg_128 = COPY %10:areg_128
- GLOBAL_STORE_DWORDX4_SADDR %5:vgpr_32, %12:vreg_128, %4:sreg_64_xexec, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5:vgpr_32, %12:vreg_128, %4:sreg_64_xexec_xnull, 0, 0, implicit $exec :: (store (s128), addrspace 1)
S_ENDPGM 0
...
---
@@ -51,7 +51,7 @@ body: |
; GFX908: liveins: $sgpr0_sgpr1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX908-NEXT: undef [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 1073741824, implicit $exec
; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub1:areg_128 = COPY [[V_ACCVGPR_WRITE_B32_e64_]].sub0
@@ -64,7 +64,7 @@ body: |
; GFX908-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
; GFX908-NEXT: S_ENDPGM 0
%1:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64(p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1:sgpr_64(p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
undef %11.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 1073741824, implicit $exec
%11.sub1:areg_128 = COPY %11.sub0:areg_128
@@ -74,7 +74,7 @@ body: |
%9:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
%10:areg_128 = V_MFMA_F32_4X4X1F32_e64 %9:vgpr_32, %8:vgpr_32, %11:areg_128, 0, 0, 0, implicit $mode, implicit $exec
%12:vreg_128 = COPY %10:areg_128
- GLOBAL_STORE_DWORDX4_SADDR %5:vgpr_32, %12:vreg_128, %4:sreg_64_xexec, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5:vgpr_32, %12:vreg_128, %4:sreg_64_xexec_xnull, 0, 0, implicit $exec :: (store (s128), addrspace 1)
S_ENDPGM 0
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir b/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir
index 4f8255d93ef2c4..c85d9f408072f1 100644
--- a/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir
+++ b/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir
@@ -24,7 +24,7 @@ body: |
%0:sgpr_64 = COPY killed $sgpr0_sgpr1
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0(p4), 36, 0
+ %2:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %0(p4), 36, 0
%3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0(p4), 44, 0
%4:sreg_32 = S_ADD_I32 %3, 1, implicit-def dead $scc
%5:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
index d62ff3795d8b8c..8a7762fb4b6c7f 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
@@ -43,7 +43,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE]]
+ ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
; GFX908-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE
; GFX908-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; GFX908-NEXT: S_BRANCH %bb.1
@@ -105,7 +105,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE]]
+ ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
; GFX90A_GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE
; GFX90A_GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; GFX90A_GFX940-NEXT: S_BRANCH %bb.1
@@ -167,7 +167,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX11_GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GFX11_GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GFX11_GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX11_GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE]]
+ ; GFX11_GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
; GFX11_GFX12-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_32 = SI_PS_LIVE
; GFX11_GFX12-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[SI_PS_LIVE]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; GFX11_GFX12-NEXT: S_BRANCH %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
index 946ee9eb9c0653..1fb34abb41a2de 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
@@ -44,7 +44,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE]]
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
; GFX90A-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE
; GFX90A-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
@@ -126,7 +126,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE]]
+ ; GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
; GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE
; GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
@@ -208,7 +208,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE]]
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
; GFX11-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_32 = SI_PS_LIVE
; GFX11-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; GFX11-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[SI_PS_LIVE]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
index 4d62d30a38ed34..292722c2607add 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
@@ -8,16 +8,16 @@
define amdgpu_kernel void @s_input_output_i128() {
; GFX908-LABEL: name: s_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7340042 /* regdef:SGPR_128 */, def %11
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7405578 /* regdef:SGPR_128 */, def %11
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %11
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7340041 /* reguse:SGPR_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7405577 /* reguse:SGPR_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: s_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7340042 /* regdef:SGPR_128 */, def %9
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7405578 /* regdef:SGPR_128 */, def %9
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %9
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7340041 /* reguse:SGPR_128 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7405577 /* reguse:SGPR_128 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=s"()
call void asm sideeffect "; use $0", "s"(i128 %val)
@@ -27,16 +27,16 @@ define amdgpu_kernel void @s_input_output_i128() {
define amdgpu_kernel void @v_input_output_i128() {
; GFX908-LABEL: name: v_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128 */, def %11
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %11
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %11
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6225929 /* reguse:VReg_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:VReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: v_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6553610 /* regdef:VReg_128_Align2 */, def %9
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %9
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %9
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6553609 /* reguse:VReg_128_Align2 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6619145 /* reguse:VReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=v"()
call void asm sideeffect "; use $0", "v"(i128 %val)
@@ -46,16 +46,16 @@ define amdgpu_kernel void @v_input_output_i128() {
define amdgpu_kernel void @a_input_output_i128() {
; GFX908-LABEL: name: a_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:AReg_128 */, def %11
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:AReg_128 */, def %11
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %11
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6160393 /* reguse:AReg_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6225929 /* reguse:AReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: a_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:AReg_128_Align2 */, def %9
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6488074 /* regdef:AReg_128_Align2 */, def %9
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %9
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6422537 /* reguse:AReg_128_Align2 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6488073 /* reguse:AReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = call i128 asm sideeffect "; def $0", "=a"()
call void asm sideeffect "; use $0", "a"(i128 %val)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir
index c3ead8bff360e3..179c9f4f8dc4d0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir
@@ -16,7 +16,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
+ ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 44, 0
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 [[S_LOAD_DWORD_IMM]]
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
@@ -24,7 +24,7 @@ body: |
; GCN-NEXT: S_ENDPGM 0
%1:sgpr_64(p4) = COPY $sgpr0_sgpr1
%4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
+ %5:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
%6:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1(p4), 44, 0
%7:sgpr_32 = WAVE_REDUCE_UMAX_PSEUDO_U32 killed %6, 1, implicit $exec
%8:vgpr_32 = COPY %7
@@ -46,7 +46,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
+ ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
@@ -77,7 +77,7 @@ body: |
liveins: $vgpr0, $sgpr0_sgpr1
%1:sgpr_64(p4) = COPY $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
+ %4:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%6:sgpr_32 = WAVE_REDUCE_UMAX_PSEUDO_U32 %0, 1, implicit $exec
%7:vgpr_32 = COPY %6
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir
index 7664498c5149e8..88c35a6417d237 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir
@@ -16,7 +16,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
+ ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 44, 0
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 [[S_LOAD_DWORD_IMM]]
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
@@ -24,7 +24,7 @@ body: |
; GCN-NEXT: S_ENDPGM 0
%1:sgpr_64(p4) = COPY $sgpr0_sgpr1
%4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
+ %5:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
%6:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1(p4), 44, 0
%7:sgpr_32 = WAVE_REDUCE_UMIN_PSEUDO_U32 killed %6, 1, implicit $exec
%8:vgpr_32 = COPY %7
@@ -46,7 +46,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
+ ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 4294967295
@@ -77,7 +77,7 @@ body: |
liveins: $vgpr0, $sgpr0_sgpr1
%1:sgpr_64(p4) = COPY $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
+ %4:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%6:sgpr_32 = WAVE_REDUCE_UMIN_PSEUDO_U32 %0, 1, implicit $exec
%7:vgpr_32 = COPY %6
diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir
index 5c43cd24a686df..1afc24dcdfaa9f 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir
@@ -136,14 +136,14 @@ body: |
; GCN-LABEL: name: no_merge_flat_global_load_dword_saddr
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr undef`)
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF1]], [[DEF]].sub0, 4, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef` + 4, align 4, addrspace 1)
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD]], implicit [[COPY]], implicit [[COPY1]]
%0:vreg_64_align2 = IMPLICIT_DEF
- %1:sreg_64_xexec = IMPLICIT_DEF
+ %1:sreg_64_xexec_xnull = IMPLICIT_DEF
%2:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr undef`, basealign 4)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %0.sub0, 4, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef` + 4, basealign 4, addrspace 1)
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %0.sub0, 8, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef` + 8, basealign 4, addrspace 1)
@@ -157,14 +157,14 @@ body: |
; GCN-LABEL: name: no_merge_global_saddr_flat_load_dword
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF1]], [[DEF]].sub0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 4, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr undef` + 4, align 4)
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1
; GCN-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORD_SADDR]], implicit [[COPY]], implicit [[COPY1]]
%0:vreg_64_align2 = IMPLICIT_DEF
- %1:sreg_64_xexec = IMPLICIT_DEF
+ %1:sreg_64_xexec_xnull = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %0.sub0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
%3:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr undef` + 4)
%4:vgpr_32 = FLAT_LOAD_DWORD %0, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr undef` + 8)
@@ -279,13 +279,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: no_merge_flat_global_store_dword_saddr
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[DEF2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr undef`)
; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF]].sub0, [[DEF3]], [[DEF1]], 4, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
%0:vreg_64_align2 = IMPLICIT_DEF
- %1:sreg_64_xexec = IMPLICIT_DEF
+ %1:sreg_64_xexec_xnull = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
FLAT_STORE_DWORD %0, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr undef`)
@@ -298,13 +298,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: no_merge_global_saddr_flat_store_dword
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF]].sub0, [[DEF2]], [[DEF1]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[DEF3]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr undef`)
%0:vreg_64_align2 = IMPLICIT_DEF
- %1:sreg_64_xexec = IMPLICIT_DEF
+ %1:sreg_64_xexec_xnull = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
GLOBAL_STORE_DWORD_SADDR %0.sub0, %2, %1, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir
index ffa250f1c75b82..0b868c0e432715 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir
@@ -235,13 +235,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_load_dword_saddr_2
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1)
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -254,7 +254,7 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_load_dword_saddr_3
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX3_SADDR:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3_SADDR [[DEF]], [[DEF1]], 0, 1, implicit $exec :: (load (s96) from `ptr addrspace(1) undef`, align 4, addrspace 1)
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX3_SADDR]].sub0_sub1
@@ -262,7 +262,7 @@ body: |
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub1
; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY1]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 1, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 1, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -276,7 +276,7 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_load_dword_saddr_4
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 2, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1)
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2
@@ -286,7 +286,7 @@ body: |
; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1
; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 2, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 2, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -301,7 +301,7 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_load_dword_saddr_6
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 4, 3, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1)
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2
@@ -314,7 +314,7 @@ body: |
; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0
; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[COPY6]], implicit [[COPY7]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 3, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 8, 3, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -331,13 +331,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_load_dwordx2_saddr
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1)
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1
; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub2_sub3
; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR %0, %1, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR %0, %1, 8, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -350,12 +350,12 @@ body: |
bb.0.entry:
; GCN-LABEL: name: no_merge_global_load_dword_and_global_load_dword_saddr
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub0, 4, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORD]], implicit [[GLOBAL_LOAD_DWORD_SADDR]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD %1, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1.sub0, 4, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -386,12 +386,12 @@ body: |
bb.0.entry:
; GCN-LABEL: name: no_merge_global_load_dword_saddr_different_vaddr
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub1, 4, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORD_SADDR]], implicit [[GLOBAL_LOAD_DWORD_SADDR1]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1.sub0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1.sub1, 4, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -691,13 +691,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_store_dword_saddr_2
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1
; GCN-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE]], [[DEF]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) undef`, align 4, addrspace 1)
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
@@ -711,7 +711,7 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_store_dword_saddr_3
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -719,7 +719,7 @@ body: |
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
; GCN-NEXT: GLOBAL_STORE_DWORDX3_SADDR [[DEF1]], killed [[REG_SEQUENCE1]], [[DEF]], 4, 1, implicit $exec :: (store (s96) into `ptr addrspace(1) undef`, align 4, addrspace 1)
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
@@ -735,7 +735,7 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_store_dword_saddr_4
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -745,7 +745,7 @@ body: |
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3
; GCN-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 2, implicit $exec :: (store (s128) into `ptr addrspace(1) undef`, align 4, addrspace 1)
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
@@ -763,7 +763,7 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_store_dword_saddr_6
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -777,7 +777,7 @@ body: |
; GCN-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 3, implicit $exec :: (store (s128) into `ptr addrspace(1) undef`, align 4, addrspace 1)
; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF6]], %subreg.sub0, [[DEF7]], %subreg.sub1
; GCN-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE3]], [[DEF]], 20, 3, implicit $exec :: (store (s64) into `ptr addrspace(1) undef`, align 4, addrspace 1)
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
@@ -799,13 +799,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: no_merge_global_store_dword_saddr_with_global_store_dword
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]].sub0, [[DEF2]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF1]], [[DEF3]], 4, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
@@ -819,13 +819,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: no_merge_global_store_dword_saddr_different_vaddr
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]].sub0, [[DEF2]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]].sub1, [[DEF3]], [[DEF]], 4, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir
index 502a1c5e007836..f982833569c236 100644
--- a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir
+++ b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir
@@ -34,7 +34,7 @@ body: |
%0:sreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
@@ -63,7 +63,7 @@ body: |
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
@@ -82,10 +82,10 @@ body: |
%0:sreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
%3:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec
- %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
+ %2:sreg_64_xexec_xnull = S_AND_B64 %1, 1, implicit-def $scc
S_CMP_LG_U64 %2, 0, implicit-def $scc
S_CBRANCH_SCC1 %bb.1, implicit $scc
@@ -111,7 +111,7 @@ body: |
; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], undef %4:vgpr_32, 0, 0, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
@@ -130,7 +130,7 @@ body: |
%0:sreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, undef %3:vgpr_32, 0, 0, implicit $exec
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
S_CMP_LG_U64 %2, 0, implicit-def $scc
@@ -174,7 +174,7 @@ body: |
%0:sreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%4:vgpr_32 = IMPLICIT_DEF
GLOBAL_STORE_DWORD_SADDR %3, %4, %1, 0, 0, implicit $exec
@@ -203,7 +203,7 @@ body: |
; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_ADDTID_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_ADDTID_SADDR [[REG_SEQUENCE]], 0, 0, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
@@ -250,7 +250,7 @@ body: |
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; GCN-NEXT: GLOBAL_STORE_DWORD_ADDTID_SADDR [[DEF]], [[REG_SEQUENCE]], 0, 0, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
@@ -314,7 +314,7 @@ body: |
%0:sreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
GLOBAL_ATOMIC_ADD_SADDR %3, %3, %1, 0, 0, implicit $exec
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
@@ -359,7 +359,7 @@ body: |
%0:sreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%4:vgpr_32 = GLOBAL_ATOMIC_ADD_SADDR_RTN %3, %3, %1, 0, 0, implicit $exec
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-addsubu64.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-addsubu64.ll
index 1c38f8ffc89edc..d4c66f00ffde8d 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-addsubu64.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-addsubu64.ll
@@ -7,7 +7,7 @@ define amdgpu_kernel void @add_reg_imm(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s64) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 28744523
@@ -30,7 +30,7 @@ define amdgpu_kernel void @add_reg_reg(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s64) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile load (s64) from %ir.ptr.load, addrspace 1)
@@ -53,7 +53,7 @@ define amdgpu_kernel void @sub_reg_imm(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s64) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -28744524
@@ -76,7 +76,7 @@ define amdgpu_kernel void @sub_imm_reg(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s64) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 28744523
@@ -99,7 +99,7 @@ define amdgpu_kernel void @sub_reg_reg(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s64) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile load (s64) from %ir.ptr.load, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll
index 4630b0d7ef50ba..57f7ceb964d857 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll
@@ -7,7 +7,7 @@ define amdgpu_kernel void @exp_f32(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -27,7 +27,7 @@ define amdgpu_kernel void @exp_f16(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -48,7 +48,7 @@ define amdgpu_kernel void @log_f32(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -68,7 +68,7 @@ define amdgpu_kernel void @log_f16(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -89,7 +89,7 @@ define amdgpu_kernel void @rcp_f32(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -109,7 +109,7 @@ define amdgpu_kernel void @rcp_f16(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -130,7 +130,7 @@ define amdgpu_kernel void @rsq_f32(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -150,7 +150,7 @@ define amdgpu_kernel void @rsq_f16(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -171,7 +171,7 @@ define amdgpu_kernel void @sqrt_f32(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -191,7 +191,7 @@ define amdgpu_kernel void @sqrt_f16(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
index 9607889c71793a..63ee27e0f83ba6 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
@@ -10,7 +10,7 @@ body: |
bb.0:
%0:sreg_32 = IMPLICIT_DEF
%1:sreg_32_xm0_xexec = IMPLICIT_DEF
- %2:sreg_64_xexec = IMPLICIT_DEF
+ %2:sreg_64_xexec_xnull = IMPLICIT_DEF
%3:sgpr_32 = IMPLICIT_DEF
%4:sreg_32_xexec_hi = IMPLICIT_DEF
%5:sreg_32 = IMPLICIT_DEF
@@ -21,7 +21,7 @@ body: |
%10:sreg_32 = IMPLICIT_DEF
%11:sreg_32 = IMPLICIT_DEF
%12:sreg_64_xexec = IMPLICIT_DEF
- %13:sreg_64_xexec = IMPLICIT_DEF
+ %13:sreg_64_xexec_xnull = IMPLICIT_DEF
%14:sreg_32 = IMPLICIT_DEF
%15:sreg_32 = IMPLICIT_DEF
%16:sreg_32 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index 58b61510c24e8b..72aafcaca3ff81 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -11,7 +11,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX908-NEXT: {{ $}}
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32
- ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128 */, def %26
+ ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %26
; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %26
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %23
; REGALLOC-GFX908-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
@@ -36,7 +36,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX908-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX908-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef renamable $agpr0
- ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
+ ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
@@ -60,7 +60,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX90A-NEXT: {{ $}}
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32
- ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6553610 /* regdef:VReg_128_Align2 */, def %25
+ ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %25
; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %25
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %23
; REGALLOC-GFX90A-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
@@ -83,7 +83,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef renamable $agpr0
- ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6553610 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
+ ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1
; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
index fdfc9b043cc9d2..c6ee557d970cd7 100644
--- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
+++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
@@ -306,7 +306,7 @@ body: |
bb.3:
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- dead $sgpr30_sgpr31 = SI_CALL undef %24:sreg_64_xexec, 0, CustomRegMask($sgpr60,$sgpr62)
+ dead $sgpr30_sgpr31 = SI_CALL undef %24:sreg_64_xexec_xnull, 0, CustomRegMask($sgpr60,$sgpr62)
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
bb.4:
@@ -338,7 +338,7 @@ body: |
bb.9:
%31:vreg_64_align2 = COPY %19.sub16_sub17, implicit $exec
- GLOBAL_STORE_DWORDX2_SADDR undef %18:vgpr_32, %31, undef %24:sreg_64_xexec, 0, 0, implicit $exec :: (store (s64), addrspace 1)
+ GLOBAL_STORE_DWORDX2_SADDR undef %18:vgpr_32, %31, undef %24:sreg_64_xexec_xnull, 0, 0, implicit $exec :: (store (s64), addrspace 1)
%32:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %5, implicit $exec
dead %33:sreg_64_xexec = V_CMP_NE_U32_e64 1, %32, implicit $exec
undef %34.sub0:sreg_64 = S_ADD_U32 %15.sub0, 32, implicit-def dead $scc
diff --git a/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir b/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir
index 9c1b4af9c7fbab..3fdb0c7c0885b4 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir
@@ -12,7 +12,7 @@ body: |
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
@@ -26,7 +26,7 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 0, 0, implicit $exec :: (store (s32))
; CHECK-NEXT: S_ENDPGM 0
bb.0:
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir
index 6fa1e2b663fa9b..bdfc8227fdccb1 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir
@@ -28,7 +28,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: no_sched_barrier
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -38,7 +38,7 @@ body: |
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
@@ -58,7 +58,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_0
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -69,7 +69,7 @@ body: |
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
@@ -91,7 +91,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_1
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -102,7 +102,7 @@ body: |
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
@@ -123,7 +123,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_2
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -136,7 +136,7 @@ body: |
; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%3:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %2, implicit $exec
@@ -159,7 +159,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_4
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -178,7 +178,7 @@ body: |
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -207,7 +207,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_8
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -226,7 +226,7 @@ body: |
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -255,7 +255,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_16
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -266,7 +266,7 @@ body: |
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
@@ -287,7 +287,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_32
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -298,7 +298,7 @@ body: |
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
@@ -319,7 +319,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_64
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -330,7 +330,7 @@ body: |
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
@@ -449,7 +449,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_masks_8_12
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -470,7 +470,7 @@ body: |
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -501,7 +501,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_4_bundle
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -515,7 +515,7 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -539,7 +539,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_0_bundle
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -553,7 +553,7 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pipeline-solver.mir b/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pipeline-solver.mir
index d6d89a63c22c25..d6774bb39dca72 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pipeline-solver.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pipeline-solver.mir
@@ -18,7 +18,7 @@ tracksRegLiveness: true
body: |
bb.0:
; GREEDY-LABEL: name: sched_group_barrier_2_VMEM_10_ALU_5_MFMA_2_VMEM_WRITE
- ; GREEDY: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GREEDY-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GREEDY-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; GREEDY-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -42,7 +42,7 @@ body: |
; GREEDY-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_2_VMEM_10_ALU_5_MFMA_2_VMEM_WRITE
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -64,7 +64,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 8, 5, 0
; EXACT-NEXT: SCHED_GROUP_BARRIER 64, 2, 0
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -98,7 +98,7 @@ tracksRegLiveness: true
body: |
bb.0:
; GREEDY-LABEL: name: sched_group_barrier_MFMA_VALU_and_SALU_alternating
- ; GREEDY: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GREEDY-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GREEDY-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; GREEDY-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -130,7 +130,7 @@ body: |
; GREEDY-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_MFMA_VALU_and_SALU_alternating
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -160,7 +160,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 6, 1, 0
; EXACT-NEXT: SCHED_GROUP_BARRIER 64, 2, 0
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -210,7 +210,7 @@ tracksRegLiveness: true
body: |
bb.0:
; GREEDY-LABEL: name: sched_group_barrier_2_separate_pipes
- ; GREEDY: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GREEDY-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GREEDY-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; GREEDY-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -236,7 +236,7 @@ body: |
; GREEDY-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MUL_LO_U32_e64_3]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_2_separate_pipes
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -260,7 +260,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 64, 2, 2
; EXACT-NEXT: SCHED_GROUP_BARRIER 8, 2, 2
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MUL_LO_U32_e64_3]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -298,7 +298,7 @@ tracksRegLiveness: true
body: |
bb.0:
; GREEDY-LABEL: name: sched_group_barrier_3_separate_pipes
- ; GREEDY: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GREEDY-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GREEDY-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; GREEDY-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -328,7 +328,7 @@ body: |
; GREEDY-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_3_separate_pipes
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -356,7 +356,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 8, 1, 1
; EXACT-NEXT: SCHED_GROUP_BARRIER 16, 1, 1
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_3]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pre-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pre-RA.mir
index 372e0fee3c8eae..4f844762b24e30 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pre-RA.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pre-RA.mir
@@ -18,7 +18,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: no_sched_group_barrier
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -38,7 +38,7 @@ body: |
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: no_sched_group_barrier
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -56,7 +56,7 @@ body: |
; EXACT-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec
; EXACT-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -82,7 +82,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_group_barrier_1_VMEM_READ_1_VALU_5_MFMA_1_VMEM_READ_3_VALU_2_VMEM_WRITE
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -108,7 +108,7 @@ body: |
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_1_VMEM_READ_1_VALU_5_MFMA_1_VMEM_READ_3_VALU_2_VMEM_WRITE
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -132,7 +132,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 2, 3, 0
; EXACT-NEXT: SCHED_GROUP_BARRIER 64, 2, 0
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -170,7 +170,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_group_barrier_2_VMEM_1000_ALU_5_MFMA_2_VMEM_WRITE
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -194,7 +194,7 @@ body: |
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_2_VMEM_1000_ALU_5_MFMA_2_VMEM_WRITE
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -216,7 +216,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 8, 5, 0
; EXACT-NEXT: SCHED_GROUP_BARRIER 64, 2, 0
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -250,7 +250,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_group_barrier_MFMA_VALU_and_SALU_alternating
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -282,7 +282,7 @@ body: |
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_MFMA_VALU_and_SALU_alternating
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -312,7 +312,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 6, 1, 0
; EXACT-NEXT: SCHED_GROUP_BARRIER 64, 2, 0
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
index 2a280bcda42f52..00baea8ed7a275 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
@@ -570,7 +570,7 @@ define protected amdgpu_kernel void @nested_waterfalls(ptr addrspace(1) %tex.coe
; SI-NEXT: bb.1.if.then:
; SI-NEXT: successors: %bb.2(0x80000000)
; SI-NEXT: {{ $}}
- ; SI-NEXT: early-clobber %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM_ec killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.tex.coerce.kernarg.offset, align 4, addrspace 4)
+ ; SI-NEXT: early-clobber %10:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM_ec killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.tex.coerce.kernarg.offset, align 4, addrspace 4)
; SI-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = nuw nsw V_LSHLREV_B32_e64 3, killed [[COPY1]](s32), implicit $exec
; SI-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR killed %10, killed [[V_LSHLREV_B32_e64_]], 0, 0, implicit $exec :: (load (s64) from %ir.idx, addrspace 1)
; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[GLOBAL_LOAD_DWORDX2_SADDR]], 16, 0, implicit $exec :: (invariant load (s128) from %ir.3 + 16, addrspace 4)
diff --git a/llvm/test/MC/AMDGPU/flat-global.s b/llvm/test/MC/AMDGPU/flat-global.s
index b35ad87edeea35..2ce613b324e740 100644
--- a/llvm/test/MC/AMDGPU/flat-global.s
+++ b/llvm/test/MC/AMDGPU/flat-global.s
@@ -210,7 +210,6 @@ global_store_dword v3, v1, s[2:3] offset:-8
// GFX9: global_store_dword v3, v1, s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x70,0xdc,0x03,0x01,0x02,0x00]
// VI-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// XXX: Is this valid?
global_store_dword v3, v1, exec
// GFX10-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX9-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_flat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_flat.txt
index f8d31294ee9cc4..7e5366b80fbacc 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_flat.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_flat.txt
@@ -761,9 +761,6 @@
# GFX10: global_store_byte_d16_hi v[3:4], v1, off ; encoding: [0x00,0x80,0x64,0xdc,0x03,0x01,0x7d,0x00]
0x00,0x80,0x64,0xdc,0x03,0x01,0x7d,0x00
-# GFX10: global_store_dword v3, v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
-0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00
-
# GFX10: global_store_dword v[3:4], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00]
0x00,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00
>From fe8ea96ffaa015214875b738020150feeb0ecfcf Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Fri, 23 Aug 2024 10:27:20 -0700
Subject: [PATCH 3/7] Use declared reg class for SADDR instead of hard-code.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 8 ++++++--
llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir | 4 ++--
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index de2401afdfbde5..a4cbecb4b4427e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6245,8 +6245,12 @@ void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,
return;
Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI);
- if (MRI.getRegClass(ToSGPR) == &AMDGPU::SReg_64RegClass)
- MRI.setRegClass(ToSGPR, &AMDGPU::SReg_64_XEXEC_XNULLRegClass);
+
+ const TargetRegisterClass *DeclaredRC =
+ getRegClass(MI.getDesc(), SAddr->getOperandNo(),
+ MRI.getTargetRegisterInfo(), *MI.getParent()->getParent());
+
+ MRI.setRegClass(ToSGPR, DeclaredRC);
SAddr->setReg(ToSGPR);
}
diff --git a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir
index f982833569c236..c1c5afcd3e96db 100644
--- a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir
+++ b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir
@@ -203,7 +203,7 @@ body: |
; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_ADDTID_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_ADDTID_SADDR [[REG_SEQUENCE]], 0, 0, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
@@ -250,7 +250,7 @@ body: |
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; GCN-NEXT: GLOBAL_STORE_DWORD_ADDTID_SADDR [[DEF]], [[REG_SEQUENCE]], 0, 0, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
>From e4b7aadb85d2e2230e8ddf640b75f30e4511b19c Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Fri, 23 Aug 2024 11:47:04 -0700
Subject: [PATCH 4/7] Resolve merge conflicts.
---
.../AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
index 89c3bbc83018e7..6064b17f5f8f36 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
@@ -61,7 +61,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX908-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
; GFX908-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -136,7 +136,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
; GFX90A-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -211,7 +211,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX940-NEXT: {{ $}}
; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
; GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -286,7 +286,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GFX11-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_32_xm0_xexec = SI_PS_LIVE
>From c417aafe7b06f36d6c82a0e5642d9cca0155e9bb Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Thu, 19 Sep 2024 11:17:14 -0700
Subject: [PATCH 5/7] Pass desired reg class to readlaneVGPRToSGPR() so that it
can get the correct SGPR class.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 12 +++++++-----
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 3 ++-
2 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a4cbecb4b4427e..dc58e199efb703 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6088,10 +6088,14 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
legalizeOpWithMove(MI, VOP3Idx[2]);
}
-Register SIInstrInfo::readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
- MachineRegisterInfo &MRI) const {
+Register SIInstrInfo::readlaneVGPRToSGPR(
+ Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI,
+ const TargetRegisterClass *DstRC /*=nullptr*/) const {
const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
+ if (DstRC)
+ SRC = ST.getRegisterInfo()->getCommonSubClass(SRC, DstRC);
+
Register DstReg = MRI.createVirtualRegister(SRC);
unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
@@ -6244,13 +6248,11 @@ void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,
if (moveFlatAddrToVGPR(MI))
return;
- Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI);
-
const TargetRegisterClass *DeclaredRC =
getRegClass(MI.getDesc(), SAddr->getOperandNo(),
MRI.getTargetRegisterInfo(), *MI.getParent()->getParent());
- MRI.setRegClass(ToSGPR, DeclaredRC);
+ Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI, DeclaredRC);
SAddr->setReg(ToSGPR);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index d560792aa1a894..cbf1a2efab1049 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1218,7 +1218,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
/// threads in the wave.
/// \returns The SGPR register that \p SrcReg was copied to.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
- MachineRegisterInfo &MRI) const;
+ MachineRegisterInfo &MRI,
+ const TargetRegisterClass *DstRC = nullptr) const;
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
>From ed7cfd5c74c97963b99ff4d5e0e6bd8aab019977 Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Mon, 23 Sep 2024 11:53:42 -0700
Subject: [PATCH 6/7] Minor code changes.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 7 ++++---
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index dc58e199efb703..599d906302c929 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6094,7 +6094,7 @@ Register SIInstrInfo::readlaneVGPRToSGPR(
const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
if (DstRC)
- SRC = ST.getRegisterInfo()->getCommonSubClass(SRC, DstRC);
+ SRC = RI.getCommonSubClass(SRC, DstRC);
Register DstReg = MRI.createVirtualRegister(SRC);
unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index cbf1a2efab1049..f7554906a9c98b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1213,9 +1213,10 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
/// Fix operands in \p MI to satisfy constant bus requirements.
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
- /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only
- /// be used when it is know that the value in SrcReg is same across all
- /// threads in the wave.
+ /// Copy a value from a VGPR (\p SrcReg) to SGPR. The desired register class
+ /// for the dst register (\p DstRC) can be optionally supplied. This function
+ /// can only be used when it is know that the value in SrcReg is same across
+ /// all threads in the wave.
/// \returns The SGPR register that \p SrcReg was copied to.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
MachineRegisterInfo &MRI,
>From 6032727b0619d938aa5485fa8a432513e18cc2c6 Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Mon, 23 Sep 2024 12:32:02 -0700
Subject: [PATCH 7/7] Minor code change.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 599d906302c929..c787edf7cfd11b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6248,9 +6248,8 @@ void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,
if (moveFlatAddrToVGPR(MI))
return;
- const TargetRegisterClass *DeclaredRC =
- getRegClass(MI.getDesc(), SAddr->getOperandNo(),
- MRI.getTargetRegisterInfo(), *MI.getParent()->getParent());
+ const TargetRegisterClass *DeclaredRC = getRegClass(
+ MI.getDesc(), SAddr->getOperandNo(), &RI, *MI.getParent()->getParent());
Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI, DeclaredRC);
SAddr->setReg(ToSGPR);
More information about the llvm-commits
mailing list