[llvm] [AMDGPU] Remove leftover implicit operands from SI_SPILL/SI_RESTORE. (PR #168546)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 18 06:59:30 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (LU-JOHN)
<details>
<summary>Changes</summary>
Remove leftover implicit operands from SI_SPILL/SI_RESTORE.
---
Patch is 163.61 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168546.diff
14 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll (+636-636)
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll (+31-31)
- (modified) llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll (+19-19)
- (modified) llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i1.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu-flat.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll (+20-20)
- (modified) llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll (+22-22)
- (modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/wwm-reserved.ll (-2)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 7cb7f47ddb220..630fdc8e8891c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2094,11 +2094,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
break;
case AMDGPU::SI_SPILL_S32_TO_VGPR:
- MI.setDesc(get(AMDGPU::V_WRITELANE_B32));
+ mutateAndCleanupImplicit(MI, get(AMDGPU::V_WRITELANE_B32));
break;
case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
- MI.setDesc(get(AMDGPU::V_READLANE_B32));
+ mutateAndCleanupImplicit(MI, get(AMDGPU::V_READLANE_B32));
break;
case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
Register Dst = MI.getOperand(0).getReg();
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
index 8879ef5c8265d..d965a3dbcc8a4 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
@@ -8181,8 +8181,8 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; SI-NEXT: v_mov_b32_e32 v2, s18
; SI-NEXT: v_readlane_b32 s18, v23, 0
; SI-NEXT: s_and_b32 s16, s16, 0xff
-; SI-NEXT: v_readlane_b32 s19, v23, 1
; SI-NEXT: s_lshl_b32 s18, s18, 8
+; SI-NEXT: v_readlane_b32 s19, v23, 1
; SI-NEXT: s_or_b32 s16, s16, s18
; SI-NEXT: v_readlane_b32 s18, v23, 2
; SI-NEXT: v_readlane_b32 s19, v23, 3
@@ -8215,8 +8215,8 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; SI-NEXT: v_mov_b32_e32 v2, s16
; SI-NEXT: v_readlane_b32 s16, v23, 6
; SI-NEXT: s_and_b32 s14, s14, 0xff
-; SI-NEXT: v_readlane_b32 s17, v23, 7
; SI-NEXT: s_lshl_b32 s16, s16, 8
+; SI-NEXT: v_readlane_b32 s17, v23, 7
; SI-NEXT: s_or_b32 s14, s14, s16
; SI-NEXT: v_readlane_b32 s16, v23, 8
; SI-NEXT: v_readlane_b32 s17, v23, 9
@@ -8249,8 +8249,8 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; SI-NEXT: v_mov_b32_e32 v2, s14
; SI-NEXT: v_readlane_b32 s14, v23, 12
; SI-NEXT: s_and_b32 s12, s12, 0xff
-; SI-NEXT: v_readlane_b32 s15, v23, 13
; SI-NEXT: s_lshl_b32 s14, s14, 8
+; SI-NEXT: v_readlane_b32 s15, v23, 13
; SI-NEXT: s_or_b32 s12, s12, s14
; SI-NEXT: v_readlane_b32 s14, v23, 14
; SI-NEXT: v_readlane_b32 s15, v23, 15
@@ -8283,8 +8283,8 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; SI-NEXT: v_mov_b32_e32 v2, s12
; SI-NEXT: v_readlane_b32 s12, v23, 18
; SI-NEXT: s_and_b32 s10, s10, 0xff
-; SI-NEXT: v_readlane_b32 s13, v23, 19
; SI-NEXT: s_lshl_b32 s12, s12, 8
+; SI-NEXT: v_readlane_b32 s13, v23, 19
; SI-NEXT: s_or_b32 s10, s10, s12
; SI-NEXT: v_readlane_b32 s12, v23, 20
; SI-NEXT: v_readlane_b32 s13, v23, 21
@@ -8317,8 +8317,8 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; SI-NEXT: v_mov_b32_e32 v2, s10
; SI-NEXT: v_readlane_b32 s10, v23, 24
; SI-NEXT: s_and_b32 s8, s8, 0xff
-; SI-NEXT: v_readlane_b32 s11, v23, 25
; SI-NEXT: s_lshl_b32 s10, s10, 8
+; SI-NEXT: v_readlane_b32 s11, v23, 25
; SI-NEXT: s_or_b32 s8, s8, s10
; SI-NEXT: v_readlane_b32 s10, v23, 26
; SI-NEXT: v_readlane_b32 s11, v23, 27
@@ -8350,8 +8350,8 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; SI-NEXT: v_mov_b32_e32 v2, s8
; SI-NEXT: v_readlane_b32 s8, v23, 30
; SI-NEXT: s_and_b32 s6, s6, 0xff
-; SI-NEXT: v_readlane_b32 s9, v23, 31
; SI-NEXT: s_lshl_b32 s8, s8, 8
+; SI-NEXT: v_readlane_b32 s9, v23, 31
; SI-NEXT: s_or_b32 s6, s6, s8
; SI-NEXT: v_readlane_b32 s8, v23, 32
; SI-NEXT: v_readlane_b32 s9, v23, 33
@@ -8384,8 +8384,8 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; SI-NEXT: v_mov_b32_e32 v2, s6
; SI-NEXT: v_readlane_b32 s6, v23, 36
; SI-NEXT: s_and_b32 s4, s4, 0xff
-; SI-NEXT: v_readlane_b32 s7, v23, 37
; SI-NEXT: s_lshl_b32 s6, s6, 8
+; SI-NEXT: v_readlane_b32 s7, v23, 37
; SI-NEXT: s_or_b32 s4, s4, s6
; SI-NEXT: v_readlane_b32 s6, v23, 38
; SI-NEXT: v_readlane_b32 s7, v23, 39
@@ -8468,148 +8468,149 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; SI-NEXT: s_setpc_b64 s[30:31]
; SI-NEXT: .LBB13_4:
-; SI-NEXT: ; implicit-def: $sgpr51
; SI-NEXT: ; implicit-def: $sgpr50
-; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_writelane_b32 v23, s50, 0
-; SI-NEXT: v_writelane_b32 v23, s51, 1
-; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: ; implicit-def: $sgpr51
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 1
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 2
-; SI-NEXT: v_writelane_b32 v23, s51, 3
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 2
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 3
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 4
-; SI-NEXT: v_writelane_b32 v23, s51, 5
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 4
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 5
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 6
-; SI-NEXT: v_writelane_b32 v23, s51, 7
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 6
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 7
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 8
-; SI-NEXT: v_writelane_b32 v23, s51, 9
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 8
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 9
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 10
-; SI-NEXT: v_writelane_b32 v23, s51, 11
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 10
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 11
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 12
-; SI-NEXT: v_writelane_b32 v23, s51, 13
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 12
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 13
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 14
-; SI-NEXT: v_writelane_b32 v23, s51, 15
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 14
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 15
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 16
-; SI-NEXT: v_writelane_b32 v23, s51, 17
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 16
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 17
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 18
-; SI-NEXT: v_writelane_b32 v23, s51, 19
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 18
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 19
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 20
-; SI-NEXT: v_writelane_b32 v23, s51, 21
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 20
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 21
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 22
-; SI-NEXT: v_writelane_b32 v23, s51, 23
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 22
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 23
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 24
-; SI-NEXT: v_writelane_b32 v23, s51, 25
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 24
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 25
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 26
-; SI-NEXT: v_writelane_b32 v23, s51, 27
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 26
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 27
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 28
-; SI-NEXT: v_writelane_b32 v23, s51, 29
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 28
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 29
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 30
-; SI-NEXT: v_writelane_b32 v23, s51, 31
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 30
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 31
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 32
-; SI-NEXT: v_writelane_b32 v23, s51, 33
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 32
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 33
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 34
-; SI-NEXT: v_writelane_b32 v23, s51, 35
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 34
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 35
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 36
-; SI-NEXT: v_writelane_b32 v23, s51, 37
; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 36
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: v_writelane_b32 v23, s51, 37
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: ; implicit-def: $sgpr50
; SI-NEXT: v_writelane_b32 v23, s50, 38
+; SI-NEXT: ; kill: killed $sgpr26
+; SI-NEXT: ; implicit-def: $sgpr26
; SI-NEXT: v_writelane_b32 v23, s51, 39
-; SI-NEXT: ; implicit-def: $sgpr50
; SI-NEXT: ; kill: killed $sgpr26
; SI-NEXT: ; implicit-def: $sgpr26
+; SI-NEXT: ; implicit-def: $sgpr50
+; SI-NEXT: v_writelane_b32 v23, s50, 40
; SI-NEXT: ; implicit-def: $sgpr49
; SI-NEXT: ; implicit-def: $sgpr55
; SI-NEXT: ; implicit-def: $sgpr54
@@ -8634,7 +8635,6 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; SI-NEXT: ; implicit-def: $sgpr58
; SI-NEXT: ; implicit-def: $sgpr28
; SI-NEXT: ; implicit-def: $sgpr26
-; SI-NEXT: v_writelane_b32 v23, s50, 40
; SI-NEXT: ; implicit-def: $sgpr98
; SI-NEXT: ; implicit-def: $sgpr96
; SI-NEXT: ; implicit-def: $sgpr86
@@ -10597,10 +10597,9 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; GFX9-NEXT: .LBB13_4:
; GFX9-NEXT: ; implicit-def: $sgpr27
; GFX9-NEXT: ; kill: killed $sgpr27
-; GFX9-NEXT: ; implicit-def: $sgpr83
-; GFX9-NEXT: ; implicit-def: $sgpr82
; GFX9-NEXT: ; implicit-def: $sgpr27
; GFX9-NEXT: ; kill: killed $sgpr27
+; GFX9-NEXT: ; implicit-def: $sgpr82
; GFX9-NEXT: v_writelane_b32 v22, s82, 0
; GFX9-NEXT: ; implicit-def: $sgpr27
; GFX9-NEXT: ; kill: killed $sgpr27
@@ -10633,6 +10632,7 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; GFX9-NEXT: ; implicit-def: $sgpr85
; GFX9-NEXT: ; implicit-def: $sgpr84
; GFX9-NEXT: ; implicit-def: $sgpr81
+; GFX9-NEXT: ; implicit-def: $sgpr83
; GFX9-NEXT: ; implicit-def: $sgpr36
; GFX9-NEXT: ; implicit-def: $sgpr34
; GFX9-NEXT: ; implicit-def: $sgpr30
@@ -10985,12 +10985,10 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; GFX11-NEXT: s_lshr_b64 s[62:63], s[20:21], 24
; GFX11-NEXT: s_branch .LBB13_3
; GFX11-NEXT: .LBB13_2:
-; GFX11-NEXT: ; implicit-def: $vcc_hi
; GFX11-NEXT: ; implicit-def: $vcc_lo
-; GFX11-NEXT: ; implicit-def: $sgpr42
-; GFX11-NEXT: ; kill: killed $sgpr42
-; GFX11-NEXT: s_mov_b32 s101, -1
; GFX11-NEXT: v_writelane_b32 v37, vcc_lo, 0
+; GFX11-NEXT: ; implicit-def: $vcc_hi
+; GFX11-NEXT: ; implicit-def: $vcc_lo
; GFX11-NEXT: ; implicit-def: $sgpr42
; GFX11-NEXT: ; kill: killed $sgpr42
; GFX11-NEXT: ; implicit-def: $sgpr42
@@ -11001,7 +10999,6 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; GFX11-NEXT: ; kill: killed $sgpr42
; GFX11-NEXT: ; implicit-def: $sgpr42
; GFX11-NEXT: v_writelane_b32 v37, vcc_hi, 1
-; GFX11-NEXT: ; implicit-def: $vcc_lo
; GFX11-NEXT: ; kill: killed $sgpr42
; GFX11-NEXT: ; implicit-def: $sgpr42
; GFX11-NEXT: ; kill: killed $sgpr42
@@ -11013,6 +11010,7 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; GFX11-NEXT: ; kill: killed $sgpr42
; GFX11-NEXT: ; implicit-def: $sgpr42
; GFX11-NEXT: v_writelane_b32 v37, vcc_lo, 2
+; GFX11-NEXT: ; implicit-def: $vcc_lo
; GFX11-NEXT: ; kill: killed $sgpr42
; GFX11-NEXT: ; implicit-def: $sgpr42
; GFX11-NEXT: ; kill: killed $sgpr42
@@ -11024,7 +11022,6 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; GFX11-NEXT: ; kill: killed $sgpr42
; GFX11-NEXT: ; implicit-def: $sgpr42
; GFX11-NEXT: v_writelane_b32 v37, vcc_hi, 3
-; GFX11-NEXT: ; implicit-def: $vcc_lo
; GFX11-NEXT: ; kill: killed $sgpr42
; GFX11-NEXT: ; implicit-def: $sgpr42
; GFX11-NEXT: ; kill: killed $sgpr42
@@ -11036,6 +11033,7 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; GFX11-NEXT: ; kill: killed $sgpr42
; GFX11-NEXT: ; implicit-def: $sgpr42
; GFX11-NEXT: v_writelane_b32 v37, vcc_lo, 4
+; GFX11-NEXT: ; implicit-def: $vcc_lo
; GFX11-NEXT: ; kill: killed $sgpr42
; GFX11-NEXT: ; implicit-def: $sgpr42
; GFX11-NEXT: ; kill: killed $sgpr42
@@ -11044,10 +11042,12 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; GFX11-NEXT: ; implicit-def: $sgpr42
; GFX11-NEXT: ; kill: killed $sgpr42
; GFX11-NEXT: ; implicit-def: $sgpr42
+; GFX11-NEXT: s_mov_b32 s101, -1
+; GFX11-NEXT: v_writelane_b32 v37, vcc_hi, 5
+; GFX11-NEXT: ; kill: killed $sgpr42
+; GFX11-NEXT: ; implicit-def: $sgpr42
; GFX11-NEXT: ; kill: killed $sgpr42
; GFX11-NEXT: ; implicit-def: $sgpr42
-; GFX11-NEXT: v_writelane_b32 v37, vcc_hi, 5
-; GFX11-NEXT: ; implicit-def: $vcc_lo
; GFX11-NEXT: ; implicit-def: $sgpr45
; GFX11-NEXT: ; implicit-def: $sgpr44
; GFX11-NEXT: ; implicit-def: $sgpr30
@@ -11111,17 +11111,17 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3
; GFX11-NEXT: ; implicit-def: $sgpr88
; GFX11-NEXT: ; implicit-def: $sgpr76
; GFX11-NEXT: v_writelane_b32 v37, vcc_lo, 6
-; GFX11-NEXT: v_writelane_b32 v37, vcc_hi, 7
; GFX11-NEXT: ; implicit-def: $vcc_lo
+; GFX11-NEXT: v_writelane_b32 v37, vcc_hi, 7
; GFX11-NEXT: v_writelane_b32 v37, vcc_lo, 8
-; GFX11-NEXT: v_writelane_b32 v37, vcc_hi, 9
; GFX11-NEXT: ; implicit-def: $vcc_lo
+; GFX11-NEXT: v_writelane_b32 v37, vcc_hi, 9
; GFX11-NEXT: v_writelane_b32 v37, vcc_lo, 10
-; GFX11-NEXT: v_writelane_b32 v37, vcc_hi, 11
; GFX11-NEXT: ; implicit-def: $vcc_lo
+; GFX11-NEXT: v_writelane_b32 v37, vcc_hi, 11
; GFX11-NEXT: v_writelane_b32 v37, vcc_lo, 12
-; GFX11-NEXT: v_writelane_b32 v37, vcc_hi, 13
; GFX11-NEXT: ; implicit-def: $vcc_lo
+; GFX11-NEXT: v_writelane_b32 v37, vcc_hi, 13
; GFX11-NEXT: v_writelane_b32 v37, vcc_lo, 14
; GFX11-NEXT: v_writelane_b32 v37, vcc_hi, 15
; GFX11-NEXT: .LBB13_3: ; %Flow
@@ -45248,147 +45248,149 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a,
; SI-NEXT: v_lshrrev_b32_e32 v10, 8, v37
; SI-NEXT: s_branch .LBB37_5
; SI-NEXT: .LBB37_3:
-; SI-NEXT: ; implicit-def: $sgpr61
; SI-NEXT: ; implicit-def: $sgpr60
-; SI-NEXT: ; implicit-def: $sgpr4
-; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: v_writelane_b32 v61, s60, 0
+; SI-NEXT: ; implicit-def: $sgpr61
; SI-NEXT: v_writelane_b32 v61, s61, 1
-; SI-NEXT: ; implicit-def: $sgpr60
; SI-NEXT: ; implicit-def: $sgpr4
+; SI-NEXT: ; implicit-def: $sgpr60
+; SI-NEXT: v_writelane_b32 v61, s60, 2
; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: ; implicit-def: $sgpr4
-; SI-NEXT: v_writelane_b32 v61, s60, 2
; SI-NEXT: v_writelane_b32 v61, s61, 3
-; SI-NEXT: ; implicit-def: $sgpr60
; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: ; implicit-def: $sgpr4
+; SI-NEXT: ; implicit-def: $sgpr60
+; SI-NEXT: v_writelane_b32 v61, s60, 4
; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: ; implicit-def: $sgpr4
-; SI-NEXT: v_writelane_b32 v61, s60, 4
; SI-NEXT: v_writelane_b32 v61, s61, 5
-; SI-NEXT: ; implicit-def: $sgpr60
; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: ; implicit-def: $sgpr4
+; SI-NEXT: ; implicit-def: $sgpr60
+; SI-NEXT: v_writelane_b32 v61, s60, 6
; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: ; implicit-def: $sgpr4
-; SI-NEXT: v_writelane_b32 v61, s60, 6
; SI-NEXT: v_writelane_b32 v61, s61, 7
-; SI-NEXT: ; implicit-def: $sgpr60
; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: ; implicit-def: $sgpr4
+; SI-NEXT: ; implicit-def: $sgpr60
+; SI-NEXT: v_writelane_b32 v61, s60, 8
; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: ; implicit-def: $sgpr4
-; SI-NEXT: v_writelane_b32 v61, s60, 8
; SI-NEXT: v_writelane_b32 v61, s61, 9
-; SI-NEXT: ; implicit-def: $sgpr60
; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: ; implicit-def: $sgpr4
+; SI-NEXT: ; implicit-def: $sgpr60
+; SI-NEXT: v_writelane_b32 v61, s60, 10
; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: ; implicit-def: $sgpr4
-; SI-NEXT: v_writelane_b32 v61, s60, 10
; SI-NEXT: v_writelane_b32 v61, s61, 11
-; SI-NEXT: ; implicit-def: $sgpr60
; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: ; implicit-def: $sgpr4
+; SI-NEXT: ; implicit-def: $sgpr60
+; SI-NEXT: v_writelane_b32 v61, s60, 12
; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: ; implicit-def: $sgpr4
-; SI-NEXT: v_writelane_b32 v61, s60, 12
; SI-NEXT: v_writelane_b32 v61, s61, 13
-; SI-NEXT: ; implicit-def: $sgpr60
; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: ; implicit-def: $sgpr4
+; SI-NEXT: ; implicit-def: $sgpr60
+; SI-NEXT: v_writelane_b32 v61, s60, 14
; SI-NEXT: ; kill: killed $sgpr4
; SI-NEXT: ; implicit-def: $sgpr4
-; SI-NEXT: v_writelane_b32 v61, s60, 14
; SI-NEXT: v_writelane_...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/168546
More information about the llvm-commits
mailing list