[llvm] 84e9401 - [AMDGPU] Fix lit failure (#95620)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 14 16:45:14 PDT 2024
Author: Jeffrey Byrnes
Date: 2024-06-14T16:45:11-07:00
New Revision: 84e9401a7f848ebf3268a0ad1d7e36b8223afbc4
URL: https://github.com/llvm/llvm-project/commit/84e9401a7f848ebf3268a0ad1d7e36b8223afbc4
DIFF: https://github.com/llvm/llvm-project/commit/84e9401a7f848ebf3268a0ad1d7e36b8223afbc4.diff
LOG: [AMDGPU] Fix lit failure (#95620)
Lit test in
https://github.com/llvm/llvm-project/commit/7091dd277a1e2349f33390be9f3ccf21bff39003
was not updated for
https://github.com/llvm/llvm-project/commit/e7e90dd1c1014b4a7ef77f74af3682168d23ddbf
Added:
Modified:
llvm/test/CodeGen/AMDGPU/abs_i16.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/abs_i16.ll b/llvm/test/CodeGen/AMDGPU/abs_i16.ll
index c7d0df6d0bf46..daed0986fa9c8 100644
--- a/llvm/test/CodeGen/AMDGPU/abs_i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/abs_i16.ll
@@ -98,10 +98,10 @@ define <2 x i16> @v_abs_v2i16(<2 x i16> %arg) {
; GFX8-LABEL: v_abs_v2i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX8-NEXT: v_sub_u16_e32 v2, 0, v1
-; GFX8-NEXT: v_max_i16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v1, 0
+; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e32 v2, 0, v0
+; GFX8-NEXT: v_max_i16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v0, v0, v2
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -181,12 +181,12 @@ define <3 x i16> @v_abs_v3i16(<3 x i16> %arg) {
; GFX8-LABEL: v_abs_v3i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX8-NEXT: v_sub_u16_e32 v3, 0, v2
-; GFX8-NEXT: v_max_i16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: v_sub_u16_e32 v3, 0, v1
+; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v1, v1, v3
; GFX8-NEXT: v_sub_u16_e32 v3, 0, v0
+; GFX8-NEXT: v_max_i16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v0, v0, v3
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -286,18 +286,17 @@ define <4 x i16> @v_abs_v4i16(<4 x i16> %arg) {
; GFX8-LABEL: v_abs_v4i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX8-NEXT: v_sub_u16_e32 v3, 0, v2
-; GFX8-NEXT: v_max_i16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: v_sub_u16_e32 v4, 0, v3
-; GFX8-NEXT: v_max_i16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v2, 0
+; GFX8-NEXT: v_sub_u16_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e32 v4, 0, v1
; GFX8-NEXT: v_sub_u16_e32 v5, 0, v0
+; GFX8-NEXT: v_max_i16_sdwa v3, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v0, v0, v5
; GFX8-NEXT: v_max_i16_e32 v1, v1, v4
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_abs_v4i16:
@@ -411,24 +410,22 @@ define <6 x i16> @v_abs_v6i16(<6 x i16> %arg) {
; GFX8-LABEL: v_abs_v6i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX8-NEXT: v_sub_u16_e32 v4, 0, v3
-; GFX8-NEXT: v_max_i16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
-; GFX8-NEXT: v_sub_u16_e32 v5, 0, v4
-; GFX8-NEXT: v_max_i16_sdwa v4, v4, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
-; GFX8-NEXT: v_sub_u16_e32 v6, 0, v5
-; GFX8-NEXT: v_max_i16_sdwa v5, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v3, 0
+; GFX8-NEXT: v_sub_u16_sdwa v4, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v5, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e32 v6, 0, v2
; GFX8-NEXT: v_sub_u16_e32 v7, 0, v1
; GFX8-NEXT: v_sub_u16_e32 v8, 0, v0
+; GFX8-NEXT: v_max_i16_sdwa v4, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v5, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v3, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v0, v0, v8
; GFX8-NEXT: v_max_i16_e32 v1, v1, v7
; GFX8-NEXT: v_max_i16_e32 v2, v2, v6
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v3
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v5
+; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_abs_v6i16:
@@ -572,30 +569,27 @@ define <8 x i16> @v_abs_v8i16(<8 x i16> %arg) {
; GFX8-LABEL: v_abs_v8i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v3
-; GFX8-NEXT: v_sub_u16_e32 v5, 0, v4
-; GFX8-NEXT: v_max_i16_sdwa v4, v4, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX8-NEXT: v_sub_u16_e32 v6, 0, v5
-; GFX8-NEXT: v_max_i16_sdwa v5, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v1
-; GFX8-NEXT: v_sub_u16_e32 v7, 0, v6
-; GFX8-NEXT: v_max_i16_sdwa v6, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v0
-; GFX8-NEXT: v_sub_u16_e32 v8, 0, v7
-; GFX8-NEXT: v_max_i16_sdwa v7, v7, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v4, 0
+; GFX8-NEXT: v_sub_u16_sdwa v5, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v6, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v7, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e32 v8, 0, v3
; GFX8-NEXT: v_sub_u16_e32 v9, 0, v2
; GFX8-NEXT: v_sub_u16_e32 v10, 0, v1
; GFX8-NEXT: v_sub_u16_e32 v11, 0, v0
+; GFX8-NEXT: v_max_i16_sdwa v5, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v6, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v7, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v4, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v0, v0, v11
; GFX8-NEXT: v_max_i16_e32 v1, v1, v10
; GFX8-NEXT: v_max_i16_e32 v2, v2, v9
; GFX8-NEXT: v_max_i16_e32 v3, v3, v8
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v7
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v6
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v5
-; GFX8-NEXT: v_or_b32_e32 v3, v3, v4
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v7
+; GFX8-NEXT: v_or_b32_e32 v2, v2, v6
+; GFX8-NEXT: v_or_b32_e32 v3, v3, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_abs_v8i16:
@@ -820,30 +814,15 @@ define <16 x i16> @v_abs_v16i16(<16 x i16> %arg) {
; GFX8-LABEL: v_abs_v16i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v7
-; GFX8-NEXT: v_sub_u16_e32 v9, 0, v8
-; GFX8-NEXT: v_max_i16_sdwa v8, v8, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v6
-; GFX8-NEXT: v_sub_u16_e32 v10, 0, v9
-; GFX8-NEXT: v_max_i16_sdwa v9, v9, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v5
-; GFX8-NEXT: v_sub_u16_e32 v11, 0, v10
-; GFX8-NEXT: v_max_i16_sdwa v10, v10, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v4
-; GFX8-NEXT: v_sub_u16_e32 v12, 0, v11
-; GFX8-NEXT: v_max_i16_sdwa v11, v11, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v12, 16, v3
-; GFX8-NEXT: v_sub_u16_e32 v13, 0, v12
-; GFX8-NEXT: v_max_i16_sdwa v12, v12, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v13, 16, v2
-; GFX8-NEXT: v_sub_u16_e32 v14, 0, v13
-; GFX8-NEXT: v_max_i16_sdwa v13, v13, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v14, 16, v1
-; GFX8-NEXT: v_sub_u16_e32 v15, 0, v14
-; GFX8-NEXT: v_max_i16_sdwa v14, v14, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v15, 16, v0
-; GFX8-NEXT: v_sub_u16_e32 v16, 0, v15
-; GFX8-NEXT: v_max_i16_sdwa v15, v15, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v8, 0
+; GFX8-NEXT: v_sub_u16_sdwa v9, v8, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v10, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v11, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v12, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v13, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v14, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v15, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e32 v16, 0, v7
; GFX8-NEXT: v_sub_u16_e32 v17, 0, v6
; GFX8-NEXT: v_sub_u16_e32 v18, 0, v5
@@ -852,6 +831,14 @@ define <16 x i16> @v_abs_v16i16(<16 x i16> %arg) {
; GFX8-NEXT: v_sub_u16_e32 v21, 0, v2
; GFX8-NEXT: v_sub_u16_e32 v22, 0, v1
; GFX8-NEXT: v_sub_u16_e32 v23, 0, v0
+; GFX8-NEXT: v_max_i16_sdwa v9, v7, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v10, v6, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v11, v5, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v12, v4, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v13, v3, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v14, v2, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v15, v1, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v8, v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v0, v0, v23
; GFX8-NEXT: v_max_i16_e32 v1, v1, v22
; GFX8-NEXT: v_max_i16_e32 v2, v2, v21
@@ -860,14 +847,14 @@ define <16 x i16> @v_abs_v16i16(<16 x i16> %arg) {
; GFX8-NEXT: v_max_i16_e32 v5, v5, v18
; GFX8-NEXT: v_max_i16_e32 v6, v6, v17
; GFX8-NEXT: v_max_i16_e32 v7, v7, v16
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v15
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v14
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v13
-; GFX8-NEXT: v_or_b32_e32 v3, v3, v12
-; GFX8-NEXT: v_or_b32_e32 v4, v4, v11
-; GFX8-NEXT: v_or_b32_e32 v5, v5, v10
-; GFX8-NEXT: v_or_b32_e32 v6, v6, v9
-; GFX8-NEXT: v_or_b32_e32 v7, v7, v8
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v8
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v15
+; GFX8-NEXT: v_or_b32_e32 v2, v2, v14
+; GFX8-NEXT: v_or_b32_e32 v3, v3, v13
+; GFX8-NEXT: v_or_b32_e32 v4, v4, v12
+; GFX8-NEXT: v_or_b32_e32 v5, v5, v11
+; GFX8-NEXT: v_or_b32_e32 v6, v6, v10
+; GFX8-NEXT: v_or_b32_e32 v7, v7, v9
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_abs_v16i16:
@@ -1267,102 +1254,87 @@ define <32 x i16> @v_abs_v32i16(<32 x i16> %arg) {
; GFX8-LABEL: v_abs_v32i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v16, 16, v15
-; GFX8-NEXT: v_sub_u16_e32 v17, 0, v16
-; GFX8-NEXT: v_max_i16_sdwa v16, v16, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v14
-; GFX8-NEXT: v_sub_u16_e32 v18, 0, v17
-; GFX8-NEXT: v_max_i16_sdwa v17, v17, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v13
-; GFX8-NEXT: v_sub_u16_e32 v19, 0, v18
-; GFX8-NEXT: v_max_i16_sdwa v18, v18, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v19, 16, v12
-; GFX8-NEXT: v_sub_u16_e32 v20, 0, v19
-; GFX8-NEXT: v_max_i16_sdwa v19, v19, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v20, 16, v11
-; GFX8-NEXT: v_sub_u16_e32 v21, 0, v20
-; GFX8-NEXT: v_max_i16_sdwa v20, v20, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v21, 16, v10
-; GFX8-NEXT: v_sub_u16_e32 v22, 0, v21
-; GFX8-NEXT: v_max_i16_sdwa v21, v21, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v22, 16, v9
-; GFX8-NEXT: v_sub_u16_e32 v23, 0, v22
-; GFX8-NEXT: v_max_i16_sdwa v22, v22, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v23, 16, v8
-; GFX8-NEXT: v_sub_u16_e32 v24, 0, v23
-; GFX8-NEXT: v_max_i16_sdwa v23, v23, v24 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v24, 16, v7
-; GFX8-NEXT: v_sub_u16_e32 v25, 0, v24
-; GFX8-NEXT: v_max_i16_sdwa v24, v24, v25 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v25, 16, v6
-; GFX8-NEXT: v_sub_u16_e32 v26, 0, v25
-; GFX8-NEXT: v_max_i16_sdwa v25, v25, v26 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v26, 16, v5
-; GFX8-NEXT: v_sub_u16_e32 v27, 0, v26
-; GFX8-NEXT: v_max_i16_sdwa v26, v26, v27 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v27, 16, v4
-; GFX8-NEXT: v_sub_u16_e32 v28, 0, v27
-; GFX8-NEXT: v_max_i16_sdwa v27, v27, v28 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v28, 16, v3
-; GFX8-NEXT: v_sub_u16_e32 v29, 0, v28
-; GFX8-NEXT: v_max_i16_sdwa v28, v28, v29 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v29, 16, v2
-; GFX8-NEXT: v_sub_u16_e32 v30, 0, v29
-; GFX8-NEXT: v_max_i16_sdwa v29, v29, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v30, 16, v1
-; GFX8-NEXT: v_sub_u16_e32 v31, 0, v30
-; GFX8-NEXT: v_max_i16_sdwa v30, v30, v31 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_lshrrev_b32_e32 v31, 16, v0
-; GFX8-NEXT: v_sub_u16_e32 v32, 0, v31
-; GFX8-NEXT: v_max_i16_sdwa v31, v31, v32 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_sub_u16_e32 v32, 0, v0
-; GFX8-NEXT: v_max_i16_e32 v0, v0, v32
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v31
-; GFX8-NEXT: v_sub_u16_e32 v31, 0, v1
-; GFX8-NEXT: v_max_i16_e32 v1, v1, v31
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v30
-; GFX8-NEXT: v_sub_u16_e32 v30, 0, v2
-; GFX8-NEXT: v_max_i16_e32 v2, v2, v30
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v29
-; GFX8-NEXT: v_sub_u16_e32 v29, 0, v3
-; GFX8-NEXT: v_max_i16_e32 v3, v3, v29
-; GFX8-NEXT: v_or_b32_e32 v3, v3, v28
-; GFX8-NEXT: v_sub_u16_e32 v28, 0, v4
-; GFX8-NEXT: v_max_i16_e32 v4, v4, v28
-; GFX8-NEXT: v_or_b32_e32 v4, v4, v27
-; GFX8-NEXT: v_sub_u16_e32 v27, 0, v5
-; GFX8-NEXT: v_max_i16_e32 v5, v5, v27
-; GFX8-NEXT: v_or_b32_e32 v5, v5, v26
-; GFX8-NEXT: v_sub_u16_e32 v26, 0, v6
-; GFX8-NEXT: v_max_i16_e32 v6, v6, v26
-; GFX8-NEXT: v_or_b32_e32 v6, v6, v25
-; GFX8-NEXT: v_sub_u16_e32 v25, 0, v7
-; GFX8-NEXT: v_max_i16_e32 v7, v7, v25
-; GFX8-NEXT: v_or_b32_e32 v7, v7, v24
-; GFX8-NEXT: v_sub_u16_e32 v24, 0, v8
-; GFX8-NEXT: v_max_i16_e32 v8, v8, v24
-; GFX8-NEXT: v_or_b32_e32 v8, v8, v23
-; GFX8-NEXT: v_sub_u16_e32 v23, 0, v9
-; GFX8-NEXT: v_max_i16_e32 v9, v9, v23
-; GFX8-NEXT: v_or_b32_e32 v9, v9, v22
-; GFX8-NEXT: v_sub_u16_e32 v22, 0, v10
-; GFX8-NEXT: v_max_i16_e32 v10, v10, v22
-; GFX8-NEXT: v_or_b32_e32 v10, v10, v21
-; GFX8-NEXT: v_sub_u16_e32 v21, 0, v11
-; GFX8-NEXT: v_max_i16_e32 v11, v11, v21
+; GFX8-NEXT: v_mov_b32_e32 v16, 0
+; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_e32 v20, 0, v0
+; GFX8-NEXT: v_max_i16_sdwa v19, v0, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_e32 v0, v0, v20
+; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v19
+; GFX8-NEXT: v_sub_u16_e32 v19, 0, v1
+; GFX8-NEXT: v_max_i16_sdwa v20, v1, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_e32 v1, v1, v19
+; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v20
+; GFX8-NEXT: v_sub_u16_e32 v20, 0, v2
+; GFX8-NEXT: v_max_i16_sdwa v19, v2, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_e32 v2, v2, v20
+; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v2, v2, v19
+; GFX8-NEXT: v_sub_u16_e32 v19, 0, v3
+; GFX8-NEXT: v_max_i16_sdwa v20, v3, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_e32 v3, v3, v19
+; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v3, v3, v20
+; GFX8-NEXT: v_sub_u16_e32 v20, 0, v4
+; GFX8-NEXT: v_max_i16_sdwa v19, v4, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_e32 v4, v4, v20
+; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v4, v4, v19
+; GFX8-NEXT: v_sub_u16_e32 v19, 0, v5
+; GFX8-NEXT: v_max_i16_sdwa v20, v5, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_e32 v5, v5, v19
+; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v5, v5, v20
+; GFX8-NEXT: v_sub_u16_e32 v20, 0, v6
+; GFX8-NEXT: v_max_i16_sdwa v19, v6, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_e32 v6, v6, v20
+; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v6, v6, v19
+; GFX8-NEXT: v_sub_u16_e32 v19, 0, v7
+; GFX8-NEXT: v_max_i16_sdwa v20, v7, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_e32 v7, v7, v19
+; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v7, v7, v20
+; GFX8-NEXT: v_sub_u16_e32 v20, 0, v8
+; GFX8-NEXT: v_max_i16_sdwa v19, v8, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_e32 v8, v8, v20
+; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v8, v8, v19
+; GFX8-NEXT: v_sub_u16_e32 v19, 0, v9
+; GFX8-NEXT: v_max_i16_sdwa v20, v9, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_e32 v9, v9, v19
+; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v9, v9, v20
+; GFX8-NEXT: v_sub_u16_e32 v20, 0, v10
+; GFX8-NEXT: v_max_i16_sdwa v19, v10, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_e32 v10, v10, v20
+; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v10, v10, v19
+; GFX8-NEXT: v_sub_u16_e32 v19, 0, v11
+; GFX8-NEXT: v_max_i16_sdwa v20, v11, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_e32 v11, v11, v19
+; GFX8-NEXT: v_sub_u16_sdwa v17, v16, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v18, v16, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v16, v16, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v11, v11, v20
; GFX8-NEXT: v_sub_u16_e32 v20, 0, v12
+; GFX8-NEXT: v_max_i16_sdwa v16, v12, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v12, v12, v20
-; GFX8-NEXT: v_or_b32_e32 v12, v12, v19
-; GFX8-NEXT: v_sub_u16_e32 v19, 0, v13
+; GFX8-NEXT: v_or_b32_e32 v12, v12, v16
+; GFX8-NEXT: v_sub_u16_e32 v16, 0, v13
+; GFX8-NEXT: v_max_i16_sdwa v19, v13, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_sub_u16_e32 v20, 0, v15
-; GFX8-NEXT: v_max_i16_e32 v13, v13, v19
-; GFX8-NEXT: v_sub_u16_e32 v19, 0, v14
-; GFX8-NEXT: v_max_i16_e32 v14, v14, v19
+; GFX8-NEXT: v_max_i16_e32 v13, v13, v16
+; GFX8-NEXT: v_sub_u16_e32 v16, 0, v14
+; GFX8-NEXT: v_max_i16_sdwa v17, v15, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v18, v14, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_e32 v14, v14, v16
; GFX8-NEXT: v_max_i16_e32 v15, v15, v20
-; GFX8-NEXT: v_or_b32_e32 v13, v13, v18
-; GFX8-NEXT: v_or_b32_e32 v14, v14, v17
-; GFX8-NEXT: v_or_b32_e32 v15, v15, v16
+; GFX8-NEXT: v_or_b32_e32 v13, v13, v19
+; GFX8-NEXT: v_or_b32_e32 v14, v14, v18
+; GFX8-NEXT: v_or_b32_e32 v15, v15, v17
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_abs_v32i16:
More information about the llvm-commits
mailing list