[llvm] [AMDGPU][MC] Disallow null as saddr in flat instructions (PR #101730)
Jun Wang via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 23 10:28:27 PDT 2024
https://github.com/jwanggit86 updated https://github.com/llvm/llvm-project/pull/101730
>From 6123007a1197a52d05688cc573c9bc06a3e14753 Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Fri, 2 Aug 2024 11:13:32 -0700
Subject: [PATCH 1/3] [AMDGPU][MC] Disallow null as saddr in flat instructions
Some flat instructions have an saddr operand. When 'null' is provided
as saddr, it may have the same encoding as another instruction. For
example, the instructions 'global_atomic_add v1, v2, null' and
'global_atomic_add v[1:2], v2, off' have the same encoding. This patch
disallows having null as saddr.
---
.../Disassembler/AMDGPUDisassembler.cpp | 1 +
llvm/lib/Target/AMDGPU/FLATInstructions.td | 8 +-
llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 8 +
llvm/test/MC/AMDGPU/flat-global.s | 4 +-
.../MC/AMDGPU/gfx10_flat_instructions_err.s | 268 ++++++++++++++++
.../MC/AMDGPU/gfx11_flat_instructions_err.s | 253 +++++++++++++++
.../MC/AMDGPU/gfx12_flat_instructions_err.s | 289 ++++++++++++++++++
7 files changed, 825 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/MC/AMDGPU/gfx10_flat_instructions_err.s
create mode 100644 llvm/test/MC/AMDGPU/gfx11_flat_instructions_err.s
create mode 100644 llvm/test/MC/AMDGPU/gfx12_flat_instructions_err.s
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 1a0dc7098347ac..9bfb62e879ae5c 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -293,6 +293,7 @@ DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
DECODE_OPERAND_REG_7(SReg_64, OPW64)
DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
+DECODE_OPERAND_REG_7(SReg_64_XEXEC_XNULL, OPW64)
DECODE_OPERAND_REG_7(SReg_96, OPW96)
DECODE_OPERAND_REG_7(SReg_128, OPW128)
DECODE_OPERAND_REG_7(SReg_256, OPW256)
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 351563657aeb55..b4615f43ec8cea 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -209,7 +209,7 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
!con(
!con(
!if(EnableSaddr,
- (ins SReg_64:$saddr, VGPR_32:$vaddr),
+ (ins SReg_64_XEXEC_XNULL:$saddr, VGPR_32:$vaddr),
(ins VReg_64:$vaddr)),
(ins flat_offset:$offset)),
// FIXME: Operands with default values do not work with following non-optional operands.
@@ -231,7 +231,7 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
(outs),
!con(
!if(EnableSaddr,
- (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr),
+ (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64_XEXEC_XNULL:$saddr),
(ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)),
(ins flat_offset:$offset, CPol_0:$cpol)),
" $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
@@ -589,7 +589,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
- (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol),
+ (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64_XEXEC_XNULL:$saddr, flat_offset:$offset, CPol_0:$cpol),
" $vaddr, $vdata, $saddr$offset$cpol">,
GlobalSaddrTable<1, opName> {
let has_saddr = 1;
@@ -620,7 +620,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_op:$vdst),
- (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
+ (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64_XEXEC_XNULL:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
" $vdst, $vaddr, $vdata, $saddr$offset$cpol">,
GlobalSaddrTable<1, opName#"_rtn"> {
let has_saddr = 1;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 519048356f764d..0a1a40a305ba26 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -849,6 +849,14 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16],
let HasSGPR = 1;
}
+def SReg_64_XEXEC_XNULL : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
+ (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SRC_SHARED_BASE,
+ SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
+ let CopyCost = 1;
+ let AllocationPriority = 1;
+ let HasSGPR = 1;
+}
+
def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE,
SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
diff --git a/llvm/test/MC/AMDGPU/flat-global.s b/llvm/test/MC/AMDGPU/flat-global.s
index e81fae86b05583..b35ad87edeea35 100644
--- a/llvm/test/MC/AMDGPU/flat-global.s
+++ b/llvm/test/MC/AMDGPU/flat-global.s
@@ -212,8 +212,8 @@ global_store_dword v3, v1, s[2:3] offset:-8
// XXX: Is this valid?
global_store_dword v3, v1, exec
-// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
-// GFX9: global_store_dword v3, v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
+// GFX10-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX9-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
// VI-ERR: :[[@LINE-3]]:{{[0-9]+}}: error: instruction not supported on this GPU
global_load_dword v1, v[3:4], s2
diff --git a/llvm/test/MC/AMDGPU/gfx10_flat_instructions_err.s b/llvm/test/MC/AMDGPU/gfx10_flat_instructions_err.s
new file mode 100644
index 00000000000000..193e91e1d0bb13
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx10_flat_instructions_err.s
@@ -0,0 +1,268 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=GFX1010,GFX10 --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1030 %s 2>&1 | FileCheck --check-prefixes=GFX1030,GFX10 --implicit-check-not=error: %s
+
+global_atomic_add v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_add v0, v2, v4, null glc
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_add_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_add_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_and v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_and v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_and_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_and_x2 v0, v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_cmpswap v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_cmpswap v0, v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_x2 v2, v[4:7], null
+// GFX10: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+global_atomic_cmpswap_x2 v[0:1], v2, v[4:7], null
+// GFX10: :[[@LINE-1]]:46: error: invalid operand for instruction
+
+global_atomic_csub v2, v4, null
+// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX1030: :[[@LINE-2]]:28: error: invalid operand for instruction
+
+global_atomic_csub v0, v2, v4, null
+// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX1030: :[[@LINE-2]]:32: error: invalid operand for instruction
+
+global_atomic_dec v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_dec v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_dec_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_dec_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_fcmpswap v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_fcmpswap v0, v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:40: error: invalid operand for instruction
+
+global_atomic_fcmpswap_x2 v2, v[4:7], null
+// GFX10: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_fcmpswap_x2 v[0:1], v2, v[4:7], null
+// GFX10: :[[@LINE-1]]:47: error: invalid operand for instruction
+
+global_atomic_fmax v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_fmax v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_fmax_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_fmax_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_fmin v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_fmin v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_fmin_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_fmin_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_inc v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_inc v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_inc_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_inc_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_or v2, v4, null
+// GFX10: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+global_atomic_or v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_atomic_or_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_atomic_or_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:41: error: invalid operand for instruction
+
+global_atomic_smax v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_smax v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_smax_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_smax_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_smin v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_smin v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_smin_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_smin_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_sub v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_sub v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_sub_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_sub_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_swap v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_swap v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_swap_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_swap_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_umax v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_umax v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_umax_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_umax_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_umin v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_umin v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_umin_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_umin_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_xor v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_xor v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_xor_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_xor_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_load_dword v0, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_load_dwordx2 v[0:1], v4, null
+// GFX10: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_dwordx3 v[0:2], v4, null
+// GFX10: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_dwordx4 v[0:3], v4, null
+// GFX10: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_sbyte v0, v2, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_load_sbyte_d16 v0, v2, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_sbyte_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_load_short_d16 v0, v2, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_short_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_load_sshort v0, v2, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_load_ubyte v0, v2, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_load_ubyte_d16 v0, v2, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_ubyte_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_load_ushort v0, v2, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_store_byte v0, v2, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_store_byte_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_dword v0, v2, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_store_dwordx2 v0, v[2:3], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_dwordx3 v0, v[2:4], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_dwordx4 v0, v[2:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_short v0, v2, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_store_short_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/gfx11_flat_instructions_err.s b/llvm/test/MC/AMDGPU/gfx11_flat_instructions_err.s
new file mode 100644
index 00000000000000..9e51b0b8cc8cda
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_flat_instructions_err.s
@@ -0,0 +1,253 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --check-prefixes=GFX11 --implicit-check-not=error: %s
+
+global_atomic_add_f32 v0, v2, null
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_f32 v0, v2, v4, null glc
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_u32 v0, v2, v4, null glc
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_add_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_add_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_and_b32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_and_b32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_and_b64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_and_b64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_cmpswap_b32 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_b32 v0, v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_cmpswap_b64 v0, v[2:5], null
+// GFX11: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_b64 v[0:1], v2, v[4:7], null
+// GFX11: :[[@LINE-1]]:47: error: invalid operand for instruction
+
+global_atomic_cmpswap_f32 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_f32 v0, v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_csub_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_csub_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_dec_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_dec_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_dec_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_dec_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_inc_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_inc_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_inc_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_inc_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_max_f32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_max_f32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_i32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_max_i32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_i64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_i64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_max_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_max_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_min_f32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_f32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_min_i32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_i32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_min_i64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_i64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_min_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_min_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_or_b32 v0, v2, null
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_atomic_or_b32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_or_b64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_or_b64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_sub_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_sub_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_sub_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_sub_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_swap_b32 v0, v2, null
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_swap_b32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_swap_b64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_swap_b64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:44: error: invalid operand for instruction
+
+global_atomic_xor_b32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_xor_b32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_xor_b64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_xor_b64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_load_b128 v[0:3], v4, null
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_load_b32 v0, v4, null
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_load_b64 v[0:1], v4, null
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+global_load_b96 v[0:2], v4, null
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+global_load_d16_b16 v0, v2, null
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+global_load_d16_hi_b16 v0, v2, null
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_load_d16_hi_i8 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_d16_hi_u8 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_d16_i8 v0, v2, null
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_load_d16_u8 v0, v2, null
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_load_i16 v0, v2, null
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_load_i8 v0, v2, null
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+global_load_u16 v0, v2, null
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_load_u8 v0, v2, null
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+global_store_b128 v0, v[2:5], null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_store_b16 v0, v2, null
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+global_store_b32 v0, v2, null
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+global_store_b64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_store_b8 v0, v2, null
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_store_b96 v0, v[2:4], null
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_store_d16_hi_b16 v0, v2, null
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_store_d16_hi_b8 v0, v2, null
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/gfx12_flat_instructions_err.s b/llvm/test/MC/AMDGPU/gfx12_flat_instructions_err.s
new file mode 100644
index 00000000000000..5e31002d0d92ba
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_flat_instructions_err.s
@@ -0,0 +1,289 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefixes=GFX12 --implicit-check-not=error: %s
+
+global_atomic_add_f32 v0, v2, null
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_f32 v0, v2, v4, null glc
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_u32 v0, v2, v4, null glc
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_add_u64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_add_u64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_and_b32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_and_b32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_and_b64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_and_b64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_cmpswap_b32 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_b32 v0, v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_cmpswap_b64 v0, v[2:5], null
+// GFX12: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_b64 v[0:1], v2, v[4:7], null
+// GFX12: :[[@LINE-1]]:47: error: invalid operand for instruction
+
+global_atomic_cond_sub_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_cond_sub_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:40: error: invalid operand for instruction
+
+global_atomic_dec_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_dec_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_dec_u64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_dec_u64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_inc_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_inc_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_inc_u64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_inc_u64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_max_i32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_max_i32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_i64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_i64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_max_num_f32 v0, v2, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_num_f32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_max_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_max_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_u64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_u64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_min_i32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_i32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_min_i64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_i64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_min_num_f32 v0, v2, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_num_f32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_min_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_min_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_u64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_u64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_or_b32 v0, v2, null
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_atomic_or_b32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_or_b64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_or_b64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_ordered_add_b64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_ordered_add_b64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:51: error: invalid operand for instruction
+
+global_atomic_pk_add_bf16 v0, v2, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_pk_add_bf16 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_pk_add_f16 v0, v2, null
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_pk_add_f16 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+global_atomic_sub_clamp_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+global_atomic_sub_clamp_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:41: error: invalid operand for instruction
+
+global_atomic_sub_u32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_sub_u32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_sub_u64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_sub_u64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_swap_b32 v0, v2, null
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_swap_b32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_swap_b64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_swap_b64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:44: error: invalid operand for instruction
+
+global_atomic_xor_b32 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_xor_b32 v0, v2, v4, null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_xor_b64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_xor_b64 v[0:1], v2, v[4:5], null
+// GFX12: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_load_b128 v[0:3], v4, null
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_load_b32 v0, v4, null
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_load_b64 v[0:1], v4, null
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+global_load_b96 v[0:2], v4, null
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+global_load_block v[0:31], v32, null
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_d16_b16 v0, v2, null
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
+
+global_load_d16_hi_b16 v0, v2, null
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_load_d16_hi_i8 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_d16_hi_u8 v0, v2, null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_d16_i8 v0, v2, null
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_load_d16_u8 v0, v2, null
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_load_i16 v0, v2, null
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_load_i8 v0, v2, null
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+global_load_tr_b128 v[0:3], v4, null
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_tr_b128 v[0:1], v4, null
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_tr_b64 v[0:1], v4, null
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_load_tr_b64 v0, v4, null
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_load_u16 v0, v2, null
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_load_u8 v0, v2, null
+// GFX12: :[[@LINE-1]]:24: error: invalid operand for instruction
+
+global_store_b128 v0, v[2:5], null
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_store_b16 v0, v2, null
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+global_store_b32 v0, v2, null
+// GFX12: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+global_store_b64 v0, v[2:3], null
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_store_b8 v0, v2, null
+// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction
+
+global_store_b96 v0, v[2:4], null
+// GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_store_block v32, v[0:31], null
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_d16_hi_b16 v0, v2, null
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_store_d16_hi_b8 v0, v2, null
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
>From e090426914f51bcad1470cd9855daf57cd1bf71f Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Tue, 13 Aug 2024 17:33:11 -0700
Subject: [PATCH 2/3] This commit: (1) define SReg_64_XEXEC in terms of
SReg_64_XEXEC_XNULL (2) fix tests affected by the new reg class. Todo: in
final commit msg, mention EXEC as well.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +
llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 3 +-
.../global-atomic-fadd.f32-no-rtn.ll | 10 +--
.../GlobalISel/global-atomic-fadd.f32-rtn.ll | 14 ++--
.../GlobalISel/global-atomic-fadd.f64.ll | 12 +--
.../global-atomic-fadd.v2f16-no-rtn.ll | 12 +--
.../global-atomic-fadd.v2f16-rtn.ll | 6 +-
...st-select-amdgpu-atomic-cmpxchg-global.mir | 8 +-
.../AMDGPU/GlobalISel/inst-select-copy.mir | 2 +-
.../GlobalISel/inst-select-fract.f64.mir | 12 ++-
.../inst-select-load-global-saddr.mir | 80 +++++++++----------
.../AMDGPU/GlobalISel/vni8-across-blocks.ll | 22 ++---
.../test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir | 12 +--
.../CodeGen/AMDGPU/expand-si-indirect.mir | 2 +-
.../AMDGPU/global-atomic-fadd.f32-no-rtn.ll | 6 +-
.../AMDGPU/global-atomic-fadd.f32-rtn.ll | 6 +-
llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll | 24 +++---
.../AMDGPU/llvm.amdgcn.wave.reduce.umax.mir | 8 +-
.../AMDGPU/llvm.amdgcn.wave.reduce.umin.mir | 8 +-
.../merge-flat-with-global-load-store.mir | 16 ++--
.../AMDGPU/merge-global-load-store.mir | 52 ++++++------
.../CodeGen/AMDGPU/move-load-addr-to-valu.mir | 22 ++---
.../CodeGen/AMDGPU/move-to-valu-addsubu64.ll | 10 +--
.../move-to-valu-pseudo-scalar-trans.ll | 20 ++---
...ask-pre-ra-non-empty-but-used-interval.mir | 4 +-
...al-regcopy-and-spill-missed-at-regalloc.ll | 8 +-
.../ran-out-of-sgprs-allocation-failure.mir | 4 +-
.../AMDGPU/sched-barrier-hang-weak-dep.mir | 4 +-
.../CodeGen/AMDGPU/sched-barrier-pre-RA.mir | 48 +++++------
.../sched-group-barrier-pipeline-solver.mir | 24 +++---
.../AMDGPU/sched-group-barrier-pre-RA.mir | 24 +++---
llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll | 2 +-
llvm/test/MC/AMDGPU/flat-global.s | 1 -
.../MC/Disassembler/AMDGPU/gfx10_flat.txt | 3 -
34 files changed, 246 insertions(+), 245 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 8af5c364509f0e..185bc1306948fb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6215,6 +6215,8 @@ void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,
return;
Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI);
+ if (MRI.getRegClass(ToSGPR) == &AMDGPU::SReg_64RegClass)
+ MRI.setRegClass(ToSGPR, &AMDGPU::SReg_64_XEXEC_XNULLRegClass);
SAddr->setReg(ToSGPR);
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 0a1a40a305ba26..edbba1b27b9a88 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -858,8 +858,7 @@ def SReg_64_XEXEC_XNULL : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1,
}
def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
- (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE,
- SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
+ (add SReg_64_XEXEC_XNULL, SGPR_NULL64)> {
let CopyCost = 1;
let AllocationPriority = 1;
let HasSGPR = 1;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
index 3f0b86c271538a..bc7865d503771c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
@@ -38,7 +38,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_intrinsic(ptr addrspa
; GFX908_GFX11-NEXT: {{ $}}
; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
@@ -50,7 +50,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_intrinsic(ptr addrspa
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
@@ -92,7 +92,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic(ptr ad
; GFX908_GFX11-NEXT: {{ $}}
; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
@@ -104,7 +104,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic(ptr ad
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
@@ -147,7 +147,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
; GFX90A_GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
index 2dc40c38343a92..cc263f75b34336 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
@@ -39,7 +39,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_intrinsic(ptr addrspace
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
@@ -52,7 +52,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_intrinsic(ptr addrspace
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
@@ -97,7 +97,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_flat_intrinsic(ptr addr
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
@@ -110,7 +110,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_flat_intrinsic(ptr addr
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
@@ -156,7 +156,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
@@ -249,7 +249,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX940-NEXT: {{ $}}
; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX940-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
@@ -342,7 +342,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GFX11-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_32_xm0_xexec = SI_PS_LIVE
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
index 8007cc1f54a790..6968815e2e5927 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
@@ -49,7 +49,7 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_intrinsic(ptr addrspa
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
@@ -67,7 +67,7 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_intrinsic(ptr addrspac
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
@@ -131,7 +131,7 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic(ptr ad
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
@@ -149,7 +149,7 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(ptr add
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
@@ -284,7 +284,7 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
@@ -302,7 +302,7 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
index ace66e6a234aae..56ce30cf94f845 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
@@ -36,7 +36,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic(ptr addrs
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
@@ -48,7 +48,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic(ptr addrs
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
@@ -90,7 +90,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic(ptr
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
@@ -102,7 +102,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic(ptr
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
@@ -144,7 +144,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn(ptr addrspace(1) in
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
@@ -156,7 +156,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn(ptr addrspace(1) in
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
index e895d6f18c9791..e39d4ab66cc5ec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
@@ -25,7 +25,7 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_intrinsic(ptr ad
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
@@ -58,7 +58,7 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_flat_intrinsic(p
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
@@ -91,7 +91,7 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn(ptr addrspace(1)
; GFX90A_GFX940-NEXT: {{ $}}
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir
index 6f0108317f5310..09eb77fcbdd9db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir
@@ -747,7 +747,7 @@ body: |
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
@@ -758,7 +758,7 @@ body: |
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
@@ -854,7 +854,7 @@ body: |
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
@@ -865,7 +865,7 @@ body: |
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
index 3428230606080b..e07d635855cfe4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
@@ -25,7 +25,7 @@ body: |
; WAVE32-LABEL: name: copy
; WAVE32: liveins: $sgpr2_sgpr3
; WAVE32-NEXT: {{ $}}
- ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr2_sgpr3
; WAVE32-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE32-NEXT: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
index 6a1e52cd29fd9f..52b1beb0b0594c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
@@ -26,8 +26,9 @@ body: |
; GFX10-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 1, [[V_FLOOR_F64_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]]
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX10-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
+ ; GFX10-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY5]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
;
; GFX11-LABEL: name: fract_f64_neg
@@ -44,8 +45,9 @@ body: |
; GFX11-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 1, [[V_FLOOR_F64_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]]
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX11-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
+ ; GFX11-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY5]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
; GFX11-NEXT: S_ENDPGM 0
%2:sgpr(p4) = COPY $sgpr0_sgpr1
%7:sgpr(s64) = G_CONSTANT i64 36
@@ -92,8 +94,9 @@ body: |
; GFX10-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 1, [[V_FLOOR_F64_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]]
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX10-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
+ ; GFX10-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY5]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
;
; GFX11-LABEL: name: fract_f64_neg_abs
@@ -110,8 +113,9 @@ body: |
; GFX11-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[V_ADD_F64_e64_]], 1, [[V_FLOOR_F64_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[COPY1]]
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX11-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
+ ; GFX11-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_ADD_F64_e64_1]], [[COPY5]], 0, 0, implicit $exec :: (store (s64), addrspace 1)
; GFX11-NEXT: S_ENDPGM 0
%2:sgpr(p4) = COPY $sgpr0_sgpr1
%7:sgpr(s64) = G_CONSTANT i64 36
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
index cf4e6c8b85e3ee..65f6b8879e16fa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
@@ -17,7 +17,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -25,7 +25,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -33,7 +33,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -41,7 +41,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -67,7 +67,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -75,7 +75,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -83,7 +83,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
; GFX11: liveins: $sgpr0_sgpr1, $vgpr0
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -91,7 +91,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
; GFX12: liveins: $sgpr0_sgpr1, $vgpr0
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -120,7 +120,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -128,7 +128,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -136,7 +136,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
; GFX11: liveins: $sgpr0_sgpr1, $vgpr0
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -144,7 +144,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
; GFX12: liveins: $sgpr0_sgpr1, $vgpr0
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -266,7 +266,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -300,7 +300,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095
; GFX11: liveins: $sgpr0_sgpr1, $vgpr0
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -308,7 +308,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095
; GFX12: liveins: $sgpr0_sgpr1, $vgpr0
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -339,7 +339,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -373,7 +373,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096
; GFX11: liveins: $sgpr0_sgpr1, $vgpr0
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -381,7 +381,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096
; GFX12: liveins: $sgpr0_sgpr1, $vgpr0
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -410,7 +410,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4096
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -418,7 +418,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4096
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -426,7 +426,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4096
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -434,7 +434,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_4096
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4096, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -460,7 +460,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4097
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -468,7 +468,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4097
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -476,7 +476,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4097
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -484,7 +484,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_4097
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4097, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -518,7 +518,7 @@ body: |
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -558,7 +558,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -4097, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -584,7 +584,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_2049
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -592,7 +592,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_2049
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -600,7 +600,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_2049
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -608,7 +608,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_2049
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -634,7 +634,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -658,7 +658,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -666,7 +666,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -691,7 +691,7 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295
; GFX9: liveins: $sgpr0_sgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -699,7 +699,7 @@ body: |
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -707,7 +707,7 @@ body: |
; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -715,7 +715,7 @@ body: |
; GFX12-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4286578688, implicit $exec
; GFX12-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 8388607, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
@@ -830,7 +830,7 @@ body: |
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
index 1f1c2659e81103..58cfcef4b7b442 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
@@ -7,33 +7,33 @@ define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX906-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX906-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX906-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX906-NEXT: v_mov_b32_e32 v3, 8
+; GFX906-NEXT: v_mov_b32_e32 v4, 8
; GFX906-NEXT: v_mov_b32_e32 v5, 16
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
-; GFX906-NEXT: global_load_dword v4, v2, s[4:5]
+; GFX906-NEXT: global_load_dword v3, v2, s[4:5]
; GFX906-NEXT: v_mov_b32_e32 v1, 0xff
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
; GFX906-NEXT: s_waitcnt vmcnt(0)
-; GFX906-NEXT: v_and_b32_e32 v6, 0xff, v4
-; GFX906-NEXT: v_lshlrev_b32_sdwa v7, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
-; GFX906-NEXT: v_lshlrev_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX906-NEXT: v_or3_b32 v4, v6, v7, v4
+; GFX906-NEXT: v_and_b32_e32 v6, 0xff, v3
+; GFX906-NEXT: v_lshlrev_b32_sdwa v7, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX906-NEXT: v_or3_b32 v3, v6, v7, v3
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX906-NEXT: s_cbranch_execz .LBB0_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dword v0, v2, s[6:7]
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: v_and_b32_e32 v2, 0xff, v0
-; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
; GFX906-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX906-NEXT: v_or3_b32 v4, v2, v3, v0
+; GFX906-NEXT: v_or3_b32 v3, v2, v3, v0
; GFX906-NEXT: .LBB0_2: ; %bb.2
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v4
+; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v3
; GFX906-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
-; GFX906-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX906-NEXT: v_and_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX906-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX906-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir b/llvm/test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir
index ffa9e643409d36..86a1a26cb7abfd 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir
@@ -12,7 +12,7 @@ body: |
; GFX908: liveins: $sgpr0_sgpr1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1123418112, implicit $exec
; GFX908-NEXT: undef [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 [[V_MOV_B32_e32_1]], implicit $exec
@@ -26,7 +26,7 @@ body: |
; GFX908-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
; GFX908-NEXT: S_ENDPGM 0
%1:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64(p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1:sgpr_64(p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%13:vgpr_32 = V_MOV_B32_e32 1123418112, implicit $exec
undef %11.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 %13:vgpr_32, implicit $exec
@@ -37,7 +37,7 @@ body: |
%9:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
%10:areg_128 = V_MFMA_F32_4X4X1F32_e64 %9:vgpr_32, %8:vgpr_32, %11:areg_128, 0, 0, 0, implicit $mode, implicit $exec
%12:vreg_128 = COPY %10:areg_128
- GLOBAL_STORE_DWORDX4_SADDR %5:vgpr_32, %12:vreg_128, %4:sreg_64_xexec, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5:vgpr_32, %12:vreg_128, %4:sreg_64_xexec_xnull, 0, 0, implicit $exec :: (store (s128), addrspace 1)
S_ENDPGM 0
...
---
@@ -51,7 +51,7 @@ body: |
; GFX908: liveins: $sgpr0_sgpr1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX908-NEXT: undef [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 1073741824, implicit $exec
; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub1:areg_128 = COPY [[V_ACCVGPR_WRITE_B32_e64_]].sub0
@@ -64,7 +64,7 @@ body: |
; GFX908-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
; GFX908-NEXT: S_ENDPGM 0
%1:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64(p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ %4:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1:sgpr_64(p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
undef %11.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 1073741824, implicit $exec
%11.sub1:areg_128 = COPY %11.sub0:areg_128
@@ -74,7 +74,7 @@ body: |
%9:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
%10:areg_128 = V_MFMA_F32_4X4X1F32_e64 %9:vgpr_32, %8:vgpr_32, %11:areg_128, 0, 0, 0, implicit $mode, implicit $exec
%12:vreg_128 = COPY %10:areg_128
- GLOBAL_STORE_DWORDX4_SADDR %5:vgpr_32, %12:vreg_128, %4:sreg_64_xexec, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ GLOBAL_STORE_DWORDX4_SADDR %5:vgpr_32, %12:vreg_128, %4:sreg_64_xexec_xnull, 0, 0, implicit $exec :: (store (s128), addrspace 1)
S_ENDPGM 0
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir b/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir
index 4f8255d93ef2c4..c85d9f408072f1 100644
--- a/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir
+++ b/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir
@@ -24,7 +24,7 @@ body: |
%0:sgpr_64 = COPY killed $sgpr0_sgpr1
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0(p4), 36, 0
+ %2:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %0(p4), 36, 0
%3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0(p4), 44, 0
%4:sreg_32 = S_ADD_I32 %3, 1, implicit-def dead $scc
%5:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
index c1b333c6927497..1ea4195199ab31 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
@@ -155,7 +155,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE]]
+ ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
; GFX908-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE
; GFX908-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; GFX908-NEXT: S_BRANCH %bb.1
@@ -216,7 +216,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE]]
+ ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
; GFX90A_GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE
; GFX90A_GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; GFX90A_GFX940-NEXT: S_BRANCH %bb.1
@@ -277,7 +277,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX11_GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GFX11_GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GFX11_GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX11_GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE]]
+ ; GFX11_GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
; GFX11_GFX12-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_32 = SI_PS_LIVE
; GFX11_GFX12-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[SI_PS_LIVE]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; GFX11_GFX12-NEXT: S_BRANCH %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
index 1b955eae891590..f8178fabf9c3ca 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
@@ -164,7 +164,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE]]
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
; GFX90A-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE
; GFX90A-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; GFX90A-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
@@ -245,7 +245,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE]]
+ ; GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
; GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE
; GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
@@ -326,7 +326,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE]]
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]]
; GFX11-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_32 = SI_PS_LIVE
; GFX11-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; GFX11-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[SI_PS_LIVE]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
index 4d62d30a38ed34..292722c2607add 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
@@ -8,16 +8,16 @@
define amdgpu_kernel void @s_input_output_i128() {
; GFX908-LABEL: name: s_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7340042 /* regdef:SGPR_128 */, def %11
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7405578 /* regdef:SGPR_128 */, def %11
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %11
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7340041 /* reguse:SGPR_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7405577 /* reguse:SGPR_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: s_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7340042 /* regdef:SGPR_128 */, def %9
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7405578 /* regdef:SGPR_128 */, def %9
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %9
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7340041 /* reguse:SGPR_128 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7405577 /* reguse:SGPR_128 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=s"()
call void asm sideeffect "; use $0", "s"(i128 %val)
@@ -27,16 +27,16 @@ define amdgpu_kernel void @s_input_output_i128() {
define amdgpu_kernel void @v_input_output_i128() {
; GFX908-LABEL: name: v_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128 */, def %11
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %11
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %11
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6225929 /* reguse:VReg_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:VReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: v_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6553610 /* regdef:VReg_128_Align2 */, def %9
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %9
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %9
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6553609 /* reguse:VReg_128_Align2 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6619145 /* reguse:VReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=v"()
call void asm sideeffect "; use $0", "v"(i128 %val)
@@ -46,16 +46,16 @@ define amdgpu_kernel void @v_input_output_i128() {
define amdgpu_kernel void @a_input_output_i128() {
; GFX908-LABEL: name: a_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:AReg_128 */, def %11
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:AReg_128 */, def %11
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %11
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6160393 /* reguse:AReg_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6225929 /* reguse:AReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: a_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:AReg_128_Align2 */, def %9
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6488074 /* regdef:AReg_128_Align2 */, def %9
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %9
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6422537 /* reguse:AReg_128_Align2 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6488073 /* reguse:AReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = call i128 asm sideeffect "; def $0", "=a"()
call void asm sideeffect "; use $0", "a"(i128 %val)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir
index c3ead8bff360e3..179c9f4f8dc4d0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir
@@ -16,7 +16,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
+ ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 44, 0
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 [[S_LOAD_DWORD_IMM]]
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
@@ -24,7 +24,7 @@ body: |
; GCN-NEXT: S_ENDPGM 0
%1:sgpr_64(p4) = COPY $sgpr0_sgpr1
%4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
+ %5:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
%6:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1(p4), 44, 0
%7:sgpr_32 = WAVE_REDUCE_UMAX_PSEUDO_U32 killed %6, 1, implicit $exec
%8:vgpr_32 = COPY %7
@@ -46,7 +46,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
+ ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
@@ -77,7 +77,7 @@ body: |
liveins: $vgpr0, $sgpr0_sgpr1
%1:sgpr_64(p4) = COPY $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
+ %4:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%6:sgpr_32 = WAVE_REDUCE_UMAX_PSEUDO_U32 %0, 1, implicit $exec
%7:vgpr_32 = COPY %6
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir
index 7664498c5149e8..88c35a6417d237 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir
@@ -16,7 +16,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
+ ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 44, 0
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 [[S_LOAD_DWORD_IMM]]
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
@@ -24,7 +24,7 @@ body: |
; GCN-NEXT: S_ENDPGM 0
%1:sgpr_64(p4) = COPY $sgpr0_sgpr1
%4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
+ %5:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
%6:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1(p4), 44, 0
%7:sgpr_32 = WAVE_REDUCE_UMIN_PSEUDO_U32 killed %6, 1, implicit $exec
%8:vgpr_32 = COPY %7
@@ -46,7 +46,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
+ ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 4294967295
@@ -77,7 +77,7 @@ body: |
liveins: $vgpr0, $sgpr0_sgpr1
%1:sgpr_64(p4) = COPY $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
+ %4:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1(p4), 36, 0
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%6:sgpr_32 = WAVE_REDUCE_UMIN_PSEUDO_U32 %0, 1, implicit $exec
%7:vgpr_32 = COPY %6
diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir
index 5c43cd24a686df..1afc24dcdfaa9f 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir
@@ -136,14 +136,14 @@ body: |
; GCN-LABEL: name: no_merge_flat_global_load_dword_saddr
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr undef`)
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF1]], [[DEF]].sub0, 4, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef` + 4, align 4, addrspace 1)
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD]], implicit [[COPY]], implicit [[COPY1]]
%0:vreg_64_align2 = IMPLICIT_DEF
- %1:sreg_64_xexec = IMPLICIT_DEF
+ %1:sreg_64_xexec_xnull = IMPLICIT_DEF
%2:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr undef`, basealign 4)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %0.sub0, 4, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef` + 4, basealign 4, addrspace 1)
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %0.sub0, 8, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef` + 8, basealign 4, addrspace 1)
@@ -157,14 +157,14 @@ body: |
; GCN-LABEL: name: no_merge_global_saddr_flat_load_dword
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF1]], [[DEF]].sub0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 4, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr undef` + 4, align 4)
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1
; GCN-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORD_SADDR]], implicit [[COPY]], implicit [[COPY1]]
%0:vreg_64_align2 = IMPLICIT_DEF
- %1:sreg_64_xexec = IMPLICIT_DEF
+ %1:sreg_64_xexec_xnull = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %0.sub0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
%3:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr undef` + 4)
%4:vgpr_32 = FLAT_LOAD_DWORD %0, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr undef` + 8)
@@ -279,13 +279,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: no_merge_flat_global_store_dword_saddr
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[DEF2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr undef`)
; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF]].sub0, [[DEF3]], [[DEF1]], 4, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
%0:vreg_64_align2 = IMPLICIT_DEF
- %1:sreg_64_xexec = IMPLICIT_DEF
+ %1:sreg_64_xexec_xnull = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
FLAT_STORE_DWORD %0, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr undef`)
@@ -298,13 +298,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: no_merge_global_saddr_flat_store_dword
; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF]].sub0, [[DEF2]], [[DEF1]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[DEF3]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr undef`)
%0:vreg_64_align2 = IMPLICIT_DEF
- %1:sreg_64_xexec = IMPLICIT_DEF
+ %1:sreg_64_xexec_xnull = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
GLOBAL_STORE_DWORD_SADDR %0.sub0, %2, %1, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir
index ffa250f1c75b82..0b868c0e432715 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir
@@ -235,13 +235,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_load_dword_saddr_2
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1)
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -254,7 +254,7 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_load_dword_saddr_3
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX3_SADDR:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3_SADDR [[DEF]], [[DEF1]], 0, 1, implicit $exec :: (load (s96) from `ptr addrspace(1) undef`, align 4, addrspace 1)
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX3_SADDR]].sub0_sub1
@@ -262,7 +262,7 @@ body: |
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub1
; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY1]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 1, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 1, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -276,7 +276,7 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_load_dword_saddr_4
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 2, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1)
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2
@@ -286,7 +286,7 @@ body: |
; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1
; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 2, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 2, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -301,7 +301,7 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_load_dword_saddr_6
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 4, 3, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1)
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2
@@ -314,7 +314,7 @@ body: |
; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0
; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[COPY6]], implicit [[COPY7]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 3, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 8, 3, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -331,13 +331,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_load_dwordx2_saddr
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1)
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1
; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub2_sub3
; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR %0, %1, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR %0, %1, 8, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -350,12 +350,12 @@ body: |
bb.0.entry:
; GCN-LABEL: name: no_merge_global_load_dword_and_global_load_dword_saddr
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub0, 4, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORD]], implicit [[GLOBAL_LOAD_DWORD_SADDR]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD %1, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1.sub0, 4, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -386,12 +386,12 @@ body: |
bb.0.entry:
; GCN-LABEL: name: no_merge_global_load_dword_saddr_different_vaddr
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub1, 4, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORD_SADDR]], implicit [[GLOBAL_LOAD_DWORD_SADDR1]]
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1.sub0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1.sub1, 4, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
@@ -691,13 +691,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_store_dword_saddr_2
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1
; GCN-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE]], [[DEF]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) undef`, align 4, addrspace 1)
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
@@ -711,7 +711,7 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_store_dword_saddr_3
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -719,7 +719,7 @@ body: |
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
; GCN-NEXT: GLOBAL_STORE_DWORDX3_SADDR [[DEF1]], killed [[REG_SEQUENCE1]], [[DEF]], 4, 1, implicit $exec :: (store (s96) into `ptr addrspace(1) undef`, align 4, addrspace 1)
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
@@ -735,7 +735,7 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_store_dword_saddr_4
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -745,7 +745,7 @@ body: |
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3
; GCN-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 2, implicit $exec :: (store (s128) into `ptr addrspace(1) undef`, align 4, addrspace 1)
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
@@ -763,7 +763,7 @@ body: |
bb.0.entry:
; GCN-LABEL: name: merge_global_store_dword_saddr_6
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -777,7 +777,7 @@ body: |
; GCN-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 3, implicit $exec :: (store (s128) into `ptr addrspace(1) undef`, align 4, addrspace 1)
; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF6]], %subreg.sub0, [[DEF7]], %subreg.sub1
; GCN-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE3]], [[DEF]], 20, 3, implicit $exec :: (store (s64) into `ptr addrspace(1) undef`, align 4, addrspace 1)
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
@@ -799,13 +799,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: no_merge_global_store_dword_saddr_with_global_store_dword
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]].sub0, [[DEF2]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF1]], [[DEF3]], 4, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
@@ -819,13 +819,13 @@ body: |
bb.0.entry:
; GCN-LABEL: name: no_merge_global_store_dword_saddr_different_vaddr
- ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]].sub0, [[DEF2]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]].sub1, [[DEF3]], [[DEF]], 4, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- %0:sreg_64_xexec = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vreg_64_align2 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir
index 502a1c5e007836..f982833569c236 100644
--- a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir
+++ b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir
@@ -34,7 +34,7 @@ body: |
%0:sreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
@@ -63,7 +63,7 @@ body: |
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
@@ -82,10 +82,10 @@ body: |
%0:sreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
%3:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec
- %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
+ %2:sreg_64_xexec_xnull = S_AND_B64 %1, 1, implicit-def $scc
S_CMP_LG_U64 %2, 0, implicit-def $scc
S_CBRANCH_SCC1 %bb.1, implicit $scc
@@ -111,7 +111,7 @@ body: |
; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], undef %4:vgpr_32, 0, 0, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
@@ -130,7 +130,7 @@ body: |
%0:sreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, undef %3:vgpr_32, 0, 0, implicit $exec
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
S_CMP_LG_U64 %2, 0, implicit-def $scc
@@ -174,7 +174,7 @@ body: |
%0:sreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%4:vgpr_32 = IMPLICIT_DEF
GLOBAL_STORE_DWORD_SADDR %3, %4, %1, 0, 0, implicit $exec
@@ -203,7 +203,7 @@ body: |
; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_ADDTID_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_ADDTID_SADDR [[REG_SEQUENCE]], 0, 0, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
@@ -250,7 +250,7 @@ body: |
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; GCN-NEXT: GLOBAL_STORE_DWORD_ADDTID_SADDR [[DEF]], [[REG_SEQUENCE]], 0, 0, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
@@ -314,7 +314,7 @@ body: |
%0:sreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
GLOBAL_ATOMIC_ADD_SADDR %3, %3, %1, 0, 0, implicit $exec
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
@@ -359,7 +359,7 @@ body: |
%0:sreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
+ %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%4:vgpr_32 = GLOBAL_ATOMIC_ADD_SADDR_RTN %3, %3, %1, 0, 0, implicit $exec
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-addsubu64.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-addsubu64.ll
index 1c38f8ffc89edc..d4c66f00ffde8d 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-addsubu64.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-addsubu64.ll
@@ -7,7 +7,7 @@ define amdgpu_kernel void @add_reg_imm(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s64) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 28744523
@@ -30,7 +30,7 @@ define amdgpu_kernel void @add_reg_reg(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s64) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile load (s64) from %ir.ptr.load, addrspace 1)
@@ -53,7 +53,7 @@ define amdgpu_kernel void @sub_reg_imm(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s64) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -28744524
@@ -76,7 +76,7 @@ define amdgpu_kernel void @sub_imm_reg(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s64) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 28744523
@@ -99,7 +99,7 @@ define amdgpu_kernel void @sub_reg_reg(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s64) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile load (s64) from %ir.ptr.load, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll
index 4630b0d7ef50ba..57f7ceb964d857 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll
@@ -7,7 +7,7 @@ define amdgpu_kernel void @exp_f32(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -27,7 +27,7 @@ define amdgpu_kernel void @exp_f16(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -48,7 +48,7 @@ define amdgpu_kernel void @log_f32(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -68,7 +68,7 @@ define amdgpu_kernel void @log_f16(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -89,7 +89,7 @@ define amdgpu_kernel void @rcp_f32(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -109,7 +109,7 @@ define amdgpu_kernel void @rcp_f16(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -130,7 +130,7 @@ define amdgpu_kernel void @rsq_f32(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -150,7 +150,7 @@ define amdgpu_kernel void @rsq_f16(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -171,7 +171,7 @@ define amdgpu_kernel void @sqrt_f32(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -191,7 +191,7 @@ define amdgpu_kernel void @sqrt_f16(ptr addrspace(1) %ptr) {
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.ptr.kernarg.offset, align 4, addrspace 4)
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
index 9607889c71793a..63ee27e0f83ba6 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
@@ -10,7 +10,7 @@ body: |
bb.0:
%0:sreg_32 = IMPLICIT_DEF
%1:sreg_32_xm0_xexec = IMPLICIT_DEF
- %2:sreg_64_xexec = IMPLICIT_DEF
+ %2:sreg_64_xexec_xnull = IMPLICIT_DEF
%3:sgpr_32 = IMPLICIT_DEF
%4:sreg_32_xexec_hi = IMPLICIT_DEF
%5:sreg_32 = IMPLICIT_DEF
@@ -21,7 +21,7 @@ body: |
%10:sreg_32 = IMPLICIT_DEF
%11:sreg_32 = IMPLICIT_DEF
%12:sreg_64_xexec = IMPLICIT_DEF
- %13:sreg_64_xexec = IMPLICIT_DEF
+ %13:sreg_64_xexec_xnull = IMPLICIT_DEF
%14:sreg_32 = IMPLICIT_DEF
%15:sreg_32 = IMPLICIT_DEF
%16:sreg_32 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index 58b61510c24e8b..72aafcaca3ff81 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -11,7 +11,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX908-NEXT: {{ $}}
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32
- ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128 */, def %26
+ ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %26
; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %26
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %23
; REGALLOC-GFX908-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
@@ -36,7 +36,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX908-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX908-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef renamable $agpr0
- ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
+ ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
@@ -60,7 +60,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX90A-NEXT: {{ $}}
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32
- ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6553610 /* regdef:VReg_128_Align2 */, def %25
+ ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %25
; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %25
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %23
; REGALLOC-GFX90A-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
@@ -83,7 +83,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef renamable $agpr0
- ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6553610 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
+ ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1
; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
index fdfc9b043cc9d2..c6ee557d970cd7 100644
--- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
+++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
@@ -306,7 +306,7 @@ body: |
bb.3:
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- dead $sgpr30_sgpr31 = SI_CALL undef %24:sreg_64_xexec, 0, CustomRegMask($sgpr60,$sgpr62)
+ dead $sgpr30_sgpr31 = SI_CALL undef %24:sreg_64_xexec_xnull, 0, CustomRegMask($sgpr60,$sgpr62)
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
bb.4:
@@ -338,7 +338,7 @@ body: |
bb.9:
%31:vreg_64_align2 = COPY %19.sub16_sub17, implicit $exec
- GLOBAL_STORE_DWORDX2_SADDR undef %18:vgpr_32, %31, undef %24:sreg_64_xexec, 0, 0, implicit $exec :: (store (s64), addrspace 1)
+ GLOBAL_STORE_DWORDX2_SADDR undef %18:vgpr_32, %31, undef %24:sreg_64_xexec_xnull, 0, 0, implicit $exec :: (store (s64), addrspace 1)
%32:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %5, implicit $exec
dead %33:sreg_64_xexec = V_CMP_NE_U32_e64 1, %32, implicit $exec
undef %34.sub0:sreg_64 = S_ADD_U32 %15.sub0, 32, implicit-def dead $scc
diff --git a/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir b/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir
index 9c1b4af9c7fbab..3fdb0c7c0885b4 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir
@@ -12,7 +12,7 @@ body: |
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
@@ -26,7 +26,7 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 0, 0, implicit $exec :: (store (s32))
; CHECK-NEXT: S_ENDPGM 0
bb.0:
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir
index 6fa1e2b663fa9b..bdfc8227fdccb1 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir
@@ -28,7 +28,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: no_sched_barrier
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -38,7 +38,7 @@ body: |
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
@@ -58,7 +58,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_0
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -69,7 +69,7 @@ body: |
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
@@ -91,7 +91,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_1
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -102,7 +102,7 @@ body: |
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
@@ -123,7 +123,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_2
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -136,7 +136,7 @@ body: |
; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%3:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %2, implicit $exec
@@ -159,7 +159,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_4
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -178,7 +178,7 @@ body: |
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -207,7 +207,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_8
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -226,7 +226,7 @@ body: |
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -255,7 +255,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_16
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -266,7 +266,7 @@ body: |
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
@@ -287,7 +287,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_32
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -298,7 +298,7 @@ body: |
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
@@ -319,7 +319,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_64
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -330,7 +330,7 @@ body: |
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
@@ -449,7 +449,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_masks_8_12
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -470,7 +470,7 @@ body: |
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -501,7 +501,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_4_bundle
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -515,7 +515,7 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -539,7 +539,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_barrier_mask_0_bundle
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -553,7 +553,7 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pipeline-solver.mir b/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pipeline-solver.mir
index d6d89a63c22c25..d6774bb39dca72 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pipeline-solver.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pipeline-solver.mir
@@ -18,7 +18,7 @@ tracksRegLiveness: true
body: |
bb.0:
; GREEDY-LABEL: name: sched_group_barrier_2_VMEM_10_ALU_5_MFMA_2_VMEM_WRITE
- ; GREEDY: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GREEDY-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GREEDY-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; GREEDY-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -42,7 +42,7 @@ body: |
; GREEDY-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_2_VMEM_10_ALU_5_MFMA_2_VMEM_WRITE
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -64,7 +64,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 8, 5, 0
; EXACT-NEXT: SCHED_GROUP_BARRIER 64, 2, 0
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -98,7 +98,7 @@ tracksRegLiveness: true
body: |
bb.0:
; GREEDY-LABEL: name: sched_group_barrier_MFMA_VALU_and_SALU_alternating
- ; GREEDY: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GREEDY-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GREEDY-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; GREEDY-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -130,7 +130,7 @@ body: |
; GREEDY-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_MFMA_VALU_and_SALU_alternating
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -160,7 +160,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 6, 1, 0
; EXACT-NEXT: SCHED_GROUP_BARRIER 64, 2, 0
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -210,7 +210,7 @@ tracksRegLiveness: true
body: |
bb.0:
; GREEDY-LABEL: name: sched_group_barrier_2_separate_pipes
- ; GREEDY: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GREEDY-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GREEDY-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; GREEDY-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -236,7 +236,7 @@ body: |
; GREEDY-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MUL_LO_U32_e64_3]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_2_separate_pipes
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -260,7 +260,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 64, 2, 2
; EXACT-NEXT: SCHED_GROUP_BARRIER 8, 2, 2
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MUL_LO_U32_e64_3]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -298,7 +298,7 @@ tracksRegLiveness: true
body: |
bb.0:
; GREEDY-LABEL: name: sched_group_barrier_3_separate_pipes
- ; GREEDY: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GREEDY-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GREEDY-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; GREEDY-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -328,7 +328,7 @@ body: |
; GREEDY-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_3_separate_pipes
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -356,7 +356,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 8, 1, 1
; EXACT-NEXT: SCHED_GROUP_BARRIER 16, 1, 1
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_3]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pre-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pre-RA.mir
index 372e0fee3c8eae..4f844762b24e30 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pre-RA.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-group-barrier-pre-RA.mir
@@ -18,7 +18,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: no_sched_group_barrier
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -38,7 +38,7 @@ body: |
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: no_sched_group_barrier
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -56,7 +56,7 @@ body: |
; EXACT-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec
; EXACT-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -82,7 +82,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_group_barrier_1_VMEM_READ_1_VALU_5_MFMA_1_VMEM_READ_3_VALU_2_VMEM_WRITE
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -108,7 +108,7 @@ body: |
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_1_VMEM_READ_1_VALU_5_MFMA_1_VMEM_READ_3_VALU_2_VMEM_WRITE
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
@@ -132,7 +132,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 2, 3, 0
; EXACT-NEXT: SCHED_GROUP_BARRIER 64, 2, 0
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -170,7 +170,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_group_barrier_2_VMEM_1000_ALU_5_MFMA_2_VMEM_WRITE
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -194,7 +194,7 @@ body: |
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_2_VMEM_1000_ALU_5_MFMA_2_VMEM_WRITE
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -216,7 +216,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 8, 5, 0
; EXACT-NEXT: SCHED_GROUP_BARRIER 64, 2, 0
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
@@ -250,7 +250,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: sched_group_barrier_MFMA_VALU_and_SALU_alternating
- ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -282,7 +282,7 @@ body: |
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
;
; EXACT-LABEL: name: sched_group_barrier_MFMA_VALU_and_SALU_alternating
- ; EXACT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; EXACT: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; EXACT-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXACT-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; EXACT-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
@@ -312,7 +312,7 @@ body: |
; EXACT-NEXT: SCHED_GROUP_BARRIER 6, 1, 0
; EXACT-NEXT: SCHED_GROUP_BARRIER 64, 2, 0
; EXACT-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
- %0:sreg_64 = IMPLICIT_DEF
+ %0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:areg_128 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
index 2a280bcda42f52..00baea8ed7a275 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
@@ -570,7 +570,7 @@ define protected amdgpu_kernel void @nested_waterfalls(ptr addrspace(1) %tex.coe
; SI-NEXT: bb.1.if.then:
; SI-NEXT: successors: %bb.2(0x80000000)
; SI-NEXT: {{ $}}
- ; SI-NEXT: early-clobber %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM_ec killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.tex.coerce.kernarg.offset, align 4, addrspace 4)
+ ; SI-NEXT: early-clobber %10:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM_ec killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.tex.coerce.kernarg.offset, align 4, addrspace 4)
; SI-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = nuw nsw V_LSHLREV_B32_e64 3, killed [[COPY1]](s32), implicit $exec
; SI-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR killed %10, killed [[V_LSHLREV_B32_e64_]], 0, 0, implicit $exec :: (load (s64) from %ir.idx, addrspace 1)
; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[GLOBAL_LOAD_DWORDX2_SADDR]], 16, 0, implicit $exec :: (invariant load (s128) from %ir.3 + 16, addrspace 4)
diff --git a/llvm/test/MC/AMDGPU/flat-global.s b/llvm/test/MC/AMDGPU/flat-global.s
index b35ad87edeea35..2ce613b324e740 100644
--- a/llvm/test/MC/AMDGPU/flat-global.s
+++ b/llvm/test/MC/AMDGPU/flat-global.s
@@ -210,7 +210,6 @@ global_store_dword v3, v1, s[2:3] offset:-8
// GFX9: global_store_dword v3, v1, s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x70,0xdc,0x03,0x01,0x02,0x00]
// VI-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// XXX: Is this valid?
global_store_dword v3, v1, exec
// GFX10-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX9-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_flat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_flat.txt
index f8d31294ee9cc4..7e5366b80fbacc 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_flat.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_flat.txt
@@ -761,9 +761,6 @@
# GFX10: global_store_byte_d16_hi v[3:4], v1, off ; encoding: [0x00,0x80,0x64,0xdc,0x03,0x01,0x7d,0x00]
0x00,0x80,0x64,0xdc,0x03,0x01,0x7d,0x00
-# GFX10: global_store_dword v3, v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
-0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00
-
# GFX10: global_store_dword v[3:4], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00]
0x00,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00
>From dbdfecfb9a3636405d6cd6bce140a91e569c601d Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Fri, 23 Aug 2024 10:27:20 -0700
Subject: [PATCH 3/3] Use declared reg class for SADDR instead of hard-code.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 8 ++++++--
llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir | 4 ++--
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 185bc1306948fb..801547f17fe78a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6215,8 +6215,12 @@ void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,
return;
Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI);
- if (MRI.getRegClass(ToSGPR) == &AMDGPU::SReg_64RegClass)
- MRI.setRegClass(ToSGPR, &AMDGPU::SReg_64_XEXEC_XNULLRegClass);
+
+ const TargetRegisterClass *DeclaredRC =
+ getRegClass(MI.getDesc(), SAddr->getOperandNo(),
+ MRI.getTargetRegisterInfo(), *MI.getParent()->getParent());
+
+ MRI.setRegClass(ToSGPR, DeclaredRC);
SAddr->setReg(ToSGPR);
}
diff --git a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir
index f982833569c236..c1c5afcd3e96db 100644
--- a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir
+++ b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir
@@ -203,7 +203,7 @@ body: |
; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_ADDTID_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_ADDTID_SADDR [[REG_SEQUENCE]], 0, 0, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
@@ -250,7 +250,7 @@ body: |
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; GCN-NEXT: GLOBAL_STORE_DWORD_ADDTID_SADDR [[DEF]], [[REG_SEQUENCE]], 0, 0, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
More information about the llvm-commits
mailing list