[llvm] [SDag] Notify listeners when deleting a node (PR #66991)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 21 01:56:54 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-selectiondag

<details>
<summary>Changes</summary>

SelectionDAG::DeleteNode does not notify listeners about node deletion. As a result, SelectionDAG::Legalize may skip legalization of some nodes resulting in "Legalized selection DAG" containing illegal nodes. These nodes will be legalized during subsequent DAG combining phase, but this may be too late for some patterns to match.

---

Patch is 122.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66991.diff


11 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+3) 
- (modified) llvm/test/CodeGen/AMDGPU/half.ll (+36-36) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i1.ll (+44-44) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i16.ll (+67-66) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i8.ll (+61-61) 
- (modified) llvm/test/CodeGen/AMDGPU/load-global-i16.ll (+90-91) 
- (modified) llvm/test/CodeGen/X86/pr40730.ll (+11-5) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-5.ll (+14-14) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-8.ll (+249-255) 
- (modified) llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll (+28-28) 
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll (+6-6) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7fcd1f4f898911a..fd244f33e56c39b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1036,6 +1036,9 @@ void SelectionDAG::RemoveDeadNode(SDNode *N){
 }
 
 void SelectionDAG::DeleteNode(SDNode *N) {
+  for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+    DUL->NodeDeleted(N, nullptr);
+
   // First take this out of the appropriate CSE map.
   RemoveNodeFromCSEMaps(N);
 
diff --git a/llvm/test/CodeGen/AMDGPU/half.ll b/llvm/test/CodeGen/AMDGPU/half.ll
index e2d55990473c09c..e55c7bd3ac5e628 100644
--- a/llvm/test/CodeGen/AMDGPU/half.ll
+++ b/llvm/test/CodeGen/AMDGPU/half.ll
@@ -1969,19 +1969,19 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
 ; VI-NEXT:    s_addc_u32 s3, s1, 0
 ; VI-NEXT:    v_mov_b32_e32 v15, s3
 ; VI-NEXT:    v_mov_b32_e32 v14, s2
-; VI-NEXT:    s_add_u32 s2, s0, 0x50
+; VI-NEXT:    s_add_u32 s2, s0, 0x70
 ; VI-NEXT:    s_addc_u32 s3, s1, 0
 ; VI-NEXT:    v_mov_b32_e32 v17, s3
 ; VI-NEXT:    v_mov_b32_e32 v16, s2
-; VI-NEXT:    s_add_u32 s2, s0, 64
+; VI-NEXT:    s_add_u32 s2, s0, 0x60
 ; VI-NEXT:    s_addc_u32 s3, s1, 0
 ; VI-NEXT:    v_mov_b32_e32 v19, s3
 ; VI-NEXT:    v_mov_b32_e32 v11, s1
 ; VI-NEXT:    v_mov_b32_e32 v18, s2
-; VI-NEXT:    s_add_u32 s2, s0, 0x70
+; VI-NEXT:    s_add_u32 s2, s0, 0x50
 ; VI-NEXT:    v_mov_b32_e32 v10, s0
 ; VI-NEXT:    s_addc_u32 s3, s1, 0
-; VI-NEXT:    s_add_u32 s0, s0, 0x60
+; VI-NEXT:    s_add_u32 s0, s0, 64
 ; VI-NEXT:    s_addc_u32 s1, s1, 0
 ; VI-NEXT:    s_waitcnt vmcnt(1)
 ; VI-NEXT:    v_cvt_f32_f16_e32 v22, v4
@@ -1995,15 +1995,15 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
 ; VI-NEXT:    v_cvt_f64_f32_e32 v[4:5], v4
 ; VI-NEXT:    v_cvt_f64_f32_e32 v[6:7], v7
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cvt_f32_f16_e32 v26, v2
-; VI-NEXT:    v_cvt_f32_f16_sdwa v27, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT:    v_cvt_f32_f16_sdwa v28, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-NEXT:    v_cvt_f32_f16_e32 v26, v0
+; VI-NEXT:    v_cvt_f32_f16_sdwa v27, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-NEXT:    v_cvt_f32_f16_sdwa v28, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
-; VI-NEXT:    v_cvt_f32_f16_e32 v8, v3
-; VI-NEXT:    v_cvt_f32_f16_e32 v29, v0
-; VI-NEXT:    v_cvt_f32_f16_sdwa v30, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT:    v_cvt_f32_f16_e32 v31, v1
-; VI-NEXT:    v_cvt_f32_f16_sdwa v32, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-NEXT:    v_cvt_f32_f16_e32 v8, v1
+; VI-NEXT:    v_cvt_f32_f16_e32 v29, v2
+; VI-NEXT:    v_cvt_f32_f16_sdwa v30, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-NEXT:    v_cvt_f32_f16_e32 v31, v3
+; VI-NEXT:    v_cvt_f32_f16_sdwa v32, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v20
 ; VI-NEXT:    v_cvt_f64_f32_e32 v[2:3], v21
 ; VI-NEXT:    v_cvt_f64_f32_e32 v[4:5], v22
@@ -2040,40 +2040,40 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
 ; GFX11-NEXT:    global_load_b128 v[0:3], v32, s[2:3]
 ; GFX11-NEXT:    global_load_b128 v[4:7], v32, s[2:3] offset:16
 ; GFX11-NEXT:    s_waitcnt vmcnt(1)
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v10, v1
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v23, 16, v5
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v4
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v15, v7
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v23, v7
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v14, v6
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v22, v6
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v5
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v10, v1
+; GFX11-NEXT:    v_lshrrev_b32_e32 v15, 16, v4
 ; GFX11-NEXT:    v_cvt_f32_f16_e32 v13, v3
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; GFX11-NEXT:    v_cvt_f32_f16_e32 v12, v2
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v18, v4
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v22, v5
-; GFX11-NEXT:    v_cvt_f64_f32_e32 v[4:5], v10
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v10, v23
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v34, v11
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v11, v19
-; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
 ; GFX11-NEXT:    v_cvt_f32_f16_e32 v7, v7
 ; GFX11-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v18, v5
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v34, v11
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v11, v19
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v14, v4
+; GFX11-NEXT:    v_cvt_f64_f32_e32 v[4:5], v10
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v10, v15
 ; GFX11-NEXT:    v_cvt_f32_f16_e32 v8, v0
 ; GFX11-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX11-NEXT:    v_cvt_f64_f32_e32 v[28:29], v22
-; GFX11-NEXT:    v_cvt_f64_f32_e32 v[30:31], v10
-; GFX11-NEXT:    v_cvt_f64_f32_e32 v[24:25], v18
-; GFX11-NEXT:    v_cvt_f64_f32_e32 v[26:27], v11
+; GFX11-NEXT:    v_cvt_f64_f32_e32 v[28:29], v23
+; GFX11-NEXT:    v_cvt_f64_f32_e32 v[30:31], v7
+; GFX11-NEXT:    v_cvt_f64_f32_e32 v[24:25], v22
+; GFX11-NEXT:    v_cvt_f64_f32_e32 v[26:27], v6
 ; GFX11-NEXT:    v_cvt_f32_f16_e32 v33, v9
-; GFX11-NEXT:    v_cvt_f64_f32_e32 v[20:21], v15
-; GFX11-NEXT:    v_cvt_f64_f32_e32 v[22:23], v7
+; GFX11-NEXT:    v_cvt_f64_f32_e32 v[20:21], v18
+; GFX11-NEXT:    v_cvt_f64_f32_e32 v[22:23], v11
 ; GFX11-NEXT:    v_cvt_f64_f32_e32 v[16:17], v14
-; GFX11-NEXT:    v_cvt_f64_f32_e32 v[18:19], v6
+; GFX11-NEXT:    v_cvt_f64_f32_e32 v[18:19], v10
 ; GFX11-NEXT:    v_cvt_f64_f32_e32 v[0:1], v8
 ; GFX11-NEXT:    v_cvt_f64_f32_e32 v[8:9], v12
 ; GFX11-NEXT:    v_cvt_f64_f32_e32 v[12:13], v13
@@ -2082,10 +2082,10 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
 ; GFX11-NEXT:    v_cvt_f64_f32_e32 v[6:7], v34
 ; GFX11-NEXT:    v_cvt_f64_f32_e32 v[2:3], v33
 ; GFX11-NEXT:    s_clause 0x7
-; GFX11-NEXT:    global_store_b128 v32, v[28:31], s[0:1] offset:80
-; GFX11-NEXT:    global_store_b128 v32, v[24:27], s[0:1] offset:64
-; GFX11-NEXT:    global_store_b128 v32, v[20:23], s[0:1] offset:112
-; GFX11-NEXT:    global_store_b128 v32, v[16:19], s[0:1] offset:96
+; GFX11-NEXT:    global_store_b128 v32, v[28:31], s[0:1] offset:112
+; GFX11-NEXT:    global_store_b128 v32, v[24:27], s[0:1] offset:96
+; GFX11-NEXT:    global_store_b128 v32, v[20:23], s[0:1] offset:80
+; GFX11-NEXT:    global_store_b128 v32, v[16:19], s[0:1] offset:64
 ; GFX11-NEXT:    global_store_b128 v32, v[12:15], s[0:1] offset:48
 ; GFX11-NEXT:    global_store_b128 v32, v[8:11], s[0:1] offset:32
 ; GFX11-NEXT:    global_store_b128 v32, v[4:7], s[0:1] offset:16
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
index 438b1bfe319a04e..f433bd86023645b 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
@@ -4526,33 +4526,33 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
 ; GFX6-NEXT:    s_mov_b32 s0, s4
 ; GFX6-NEXT:    s_mov_b32 s1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    v_bfe_u32 v2, v29, 11, 1
-; GFX6-NEXT:    v_bfe_u32 v0, v29, 10, 1
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
-; GFX6-NEXT:    v_bfe_u32 v5, v29, 9, 1
+; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 15, v29
+; GFX6-NEXT:    v_bfe_u32 v0, v29, 14, 1
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
+; GFX6-NEXT:    v_bfe_u32 v5, v29, 13, 1
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_bfe_u32 v3, v29, 8, 1
-; GFX6-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:64
-; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 15, v29
+; GFX6-NEXT:    v_bfe_u32 v3, v29, 12, 1
+; GFX6-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:96
+; GFX6-NEXT:    v_bfe_u32 v8, v29, 11, 1
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_bfe_u32 v6, v29, 14, 1
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
-; GFX6-NEXT:    v_bfe_u32 v27, v29, 5, 1
-; GFX6-NEXT:    v_bfe_u32 v23, v29, 7, 1
-; GFX6-NEXT:    v_bfe_u32 v19, v29, 1, 1
-; GFX6-NEXT:    v_bfe_u32 v15, v29, 3, 1
-; GFX6-NEXT:    v_bfe_u32 v11, v29, 13, 1
-; GFX6-NEXT:    v_bfe_u32 v25, v29, 4, 1
-; GFX6-NEXT:    v_bfe_u32 v21, v29, 6, 1
-; GFX6-NEXT:    v_and_b32_e32 v17, 1, v29
-; GFX6-NEXT:    v_bfe_u32 v13, v29, 2, 1
+; GFX6-NEXT:    v_bfe_u32 v6, v29, 10, 1
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:80
+; GFX6-NEXT:    v_bfe_u32 v27, v29, 1, 1
+; GFX6-NEXT:    v_bfe_u32 v23, v29, 3, 1
+; GFX6-NEXT:    v_bfe_u32 v19, v29, 5, 1
+; GFX6-NEXT:    v_bfe_u32 v15, v29, 7, 1
+; GFX6-NEXT:    v_bfe_u32 v11, v29, 9, 1
+; GFX6-NEXT:    v_and_b32_e32 v25, 1, v29
+; GFX6-NEXT:    v_bfe_u32 v21, v29, 2, 1
+; GFX6-NEXT:    v_bfe_u32 v17, v29, 4, 1
+; GFX6-NEXT:    v_bfe_u32 v13, v29, 6, 1
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_bfe_u32 v9, v29, 12, 1
-; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:96
-; GFX6-NEXT:    buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:16
-; GFX6-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0
-; GFX6-NEXT:    buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:48
-; GFX6-NEXT:    buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:32
+; GFX6-NEXT:    v_bfe_u32 v9, v29, 8, 1
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64
+; GFX6-NEXT:    buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:48
+; GFX6-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:32
+; GFX6-NEXT:    buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:16
+; GFX6-NEXT:    buffer_store_dwordx4 v[25:28], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX8-LABEL: constant_zextload_v16i1_to_v16i64:
@@ -4561,21 +4561,20 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
 ; GFX8-NEXT:    v_mov_b32_e32 v17, 0
 ; GFX8-NEXT:    v_mov_b32_e32 v21, 0
 ; GFX8-NEXT:    v_mov_b32_e32 v19, v17
-; GFX8-NEXT:    v_mov_b32_e32 v13, v17
+; GFX8-NEXT:    v_mov_b32_e32 v4, 1
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    flat_load_ushort v2, v[0:1]
 ; GFX8-NEXT:    s_add_u32 s2, s0, 0x70
 ; GFX8-NEXT:    s_addc_u32 s3, s1, 0
-; GFX8-NEXT:    s_add_u32 s4, s0, 0x50
+; GFX8-NEXT:    s_add_u32 s4, s0, 0x60
 ; GFX8-NEXT:    s_addc_u32 s5, s1, 0
-; GFX8-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v24, s3
-; GFX8-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8-NEXT:    v_mov_b32_e32 v23, s2
-; GFX8-NEXT:    s_add_u32 s2, s0, 64
+; GFX8-NEXT:    s_add_u32 s2, s0, 0x50
 ; GFX8-NEXT:    s_addc_u32 s3, s1, 0
+; GFX8-NEXT:    v_mov_b32_e32 v13, v17
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v17
 ; GFX8-NEXT:    v_mov_b32_e32 v5, v17
 ; GFX8-NEXT:    v_mov_b32_e32 v22, 0
@@ -4584,24 +4583,26 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
 ; GFX8-NEXT:    v_mov_b32_e32 v7, 0
 ; GFX8-NEXT:    v_mov_b32_e32 v11, 0
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 10, v2
-; GFX8-NEXT:    v_and_b32_e32 v18, 1, v4
-; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 11, v2
-; GFX8-NEXT:    v_and_b32_e32 v4, 1, v4
-; GFX8-NEXT:    v_and_b32_e32 v20, 0xffff, v4
-; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 14, v2
+; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 13, v2
+; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 12, v2
+; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX8-NEXT:    v_and_b32_e32 v18, 1, v1
+; GFX8-NEXT:    v_and_b32_e32 v20, 0xffff, v0
+; GFX8-NEXT:    v_mov_b32_e32 v0, s4
+; GFX8-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8-NEXT:    flat_store_dwordx4 v[0:1], v[18:21]
-; GFX8-NEXT:    v_mov_b32_e32 v0, 1
-; GFX8-NEXT:    v_and_b32_e32 v16, 1, v4
+; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 14, v2
+; GFX8-NEXT:    v_and_b32_e32 v16, 1, v0
 ; GFX8-NEXT:    v_lshrrev_b16_e32 v18, 15, v2
 ; GFX8-NEXT:    flat_store_dwordx4 v[23:24], v[16:19]
+; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 11, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v24, s3
-; GFX8-NEXT:    v_and_b32_sdwa v16, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
-; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 9, v2
-; GFX8-NEXT:    v_mov_b32_e32 v23, s2
+; GFX8-NEXT:    v_lshrrev_b16_e32 v6, 10, v2
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX8-NEXT:    s_add_u32 s2, s0, 0x60
+; GFX8-NEXT:    v_mov_b32_e32 v23, s2
+; GFX8-NEXT:    s_add_u32 s2, s0, 64
 ; GFX8-NEXT:    v_mov_b32_e32 v19, 0
+; GFX8-NEXT:    v_and_b32_e32 v16, 1, v6
 ; GFX8-NEXT:    v_and_b32_e32 v18, 0xffff, v0
 ; GFX8-NEXT:    s_addc_u32 s3, s1, 0
 ; GFX8-NEXT:    flat_store_dwordx4 v[23:24], v[16:19]
@@ -4610,17 +4611,16 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
 ; GFX8-NEXT:    s_add_u32 s2, s0, 48
 ; GFX8-NEXT:    s_addc_u32 s3, s1, 0
 ; GFX8-NEXT:    v_mov_b32_e32 v26, s3
-; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 12, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v25, s2
 ; GFX8-NEXT:    s_add_u32 s2, s0, 32
-; GFX8-NEXT:    v_and_b32_e32 v19, 1, v0
-; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 13, v2
+; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 9, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v20, v17
 ; GFX8-NEXT:    v_mov_b32_e32 v1, v17
 ; GFX8-NEXT:    v_mov_b32_e32 v17, s1
 ; GFX8-NEXT:    s_addc_u32 s3, s1, 0
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v16, s0
+; GFX8-NEXT:    v_and_b32_sdwa v19, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 ; GFX8-NEXT:    v_and_b32_e32 v21, 0xffff, v0
 ; GFX8-NEXT:    s_add_u32 s0, s0, 16
 ; GFX8-NEXT:    v_lshrrev_b16_e32 v6, 7, v2
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index bee3d455187ca78..ace5eafc9aa8478 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -5650,27 +5650,27 @@ define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s1, 16
 ; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s13, s3, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s7, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s15, s5, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s16, s4, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s17, s6, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s5, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s15, s7, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s16, s6, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s17, s4, 16
 ; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s2, 16
 ; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s0, 16
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s0, s0, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s2, s2, 0xffff
-; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
+; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s1, s1, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s3, s3, 0xffff
-; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
+; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s14
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s3
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
@@ -5680,13 +5680,13 @@ define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s16
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s17
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s2
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s18
@@ -5707,33 +5707,33 @@ define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(ptr addrspace(1) %
 ; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-HSA-NEXT:    s_lshr_b32 s12, s5, 16
 ; GCN-HSA-NEXT:    s_lshr_b32 s13, s7, 16
-; GCN-HSA-NEXT:    s_lshr_b32 s14, s11, 16
-; GCN-HSA-NEXT:    s_lshr_b32 s2, s9, 16
-; GCN-HSA-NEXT:    s_lshr_b32 s15, s8, 16
-; GCN-HSA-NEXT:    s_lshr_b32 s16, s10, 16
+; GCN-HSA-NEXT:    s_lshr_b32 s14, s9, 16
+; GCN-HSA-NEXT:    s_lshr_b32 s2, s11, 16
+; GCN-HSA-NEXT:    s_lshr_b32 s15, s10, 16
+; GCN-HSA-NEXT:    s_lshr_b32 s16, s8, 16
 ; GCN-HSA-NEXT:    s_lshr_b32 s17, s6, 16
 ; GCN-HSA-NEXT:    s_lshr_b32 s18, s4, 16
 ; GCN-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
 ; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
-; GCN-HSA-NEXT:    s_and_b32 s10, s10, 0xffff
 ; GCN-HSA-NEXT:    s_and_b32 s8, s8, 0xffff
+; GCN-HSA-NEXT:    s_and_b32 s10, s10, 0xffff
 ; GCN-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
 ; GCN-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
-; GCN-HSA-NEXT:    s_and_b32 s11, s11, 0xffff
-; GCN-HSA-NEXT:    s_and_b32 s3, s9, 0xffff
+; GCN-HSA-NEXT:    s_and_b32 s9, s9, 0xffff
+; GCN-HSA-NEXT:    s_and_b32 s3, s11, 0xffff
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
-; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
+; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s3
 ; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
-; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
+; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
 ; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
 ; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
 ; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
-; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s11
+; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s9
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
 ; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
 ; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
@@ -5746,22 +5746,22 @@ define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(ptr addrspace(1) %
 ; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
-; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
+; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s5
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s12
 ; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
 ; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
-; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
-; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
+; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
+; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
 ; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
 ; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
 ; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
 ; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
-; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
+; GCN-HSA-NEXT:    v_mov_b32_e32 v...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/66991


More information about the llvm-commits mailing list