[llvm] [AMDGPU][True16][CodeGen] Add patterns to reduce intermediates (PR #162047)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 6 01:21:40 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Carl Ritson (perlfu)

<details>
<summary>Changes</summary>

Add patterns which reduce or operations to register sequences when combining i16 values to i32.  This removes many intermediate VGPRs and reduces registers pressure.

---

Patch is 1.16 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162047.diff


14 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+17) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll (+4156-5167) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll (+357-518) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll (+420-568) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll (+800-1134) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll (+36-84) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll (+1471-2011) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll (+210-245) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll (+149-246) 
- (modified) llvm/test/CodeGen/AMDGPU/calling-conventions.ll (+6-14) 
- (modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+15-19) 
- (modified) llvm/test/CodeGen/AMDGPU/function-args.ll (+74-116) 
- (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll (+59-93) 
- (modified) llvm/test/CodeGen/AMDGPU/idot4u.ll (+5-8) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index be084a952bc41..c7fa49a2e1d64 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3717,6 +3717,23 @@ def : GCNPat <
 } // End foreach Ty = ...
 } // End AddedComplexity = 1
 
+let True16Predicate = UseRealTrue16Insts in {
+def : GCNPat<
+  (i32 (DivergentBinFrag<or>
+    (i32 (zext i16:$src_lo)),
+    (i32 (bitconvert (v2i16 (build_vector (i16 0), (i16 VGPR_32:$src_hi)))))
+  )),
+  (REG_SEQUENCE VGPR_32, $src_lo, lo16, $src_hi, hi16)
+>;
+def : GCNPat<
+  (i32 (DivergentBinFrag<or>
+    (i32 (bitconvert (v2i16 (build_vector (i16 0), (i16 VGPR_32:$src_hi))))),
+    (i32 (zext i16:$src_lo))
+  )),
+  (REG_SEQUENCE VGPR_32, $src_lo, lo16, $src_hi, hi16)
+>;
+}
+
 let True16Predicate = UseRealTrue16Insts in
 def : GCNPat <
   (v2i16 (DivergentBinFrag<build_vector> (i16 undef), (i16 (trunc i32:$src1)))),
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
index df9c97fa23722..117af9590ff6e 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
@@ -6551,271 +6551,205 @@ define <128 x i8> @bitcast_v32i32_to_v128i8(<32 x i32> %a, i32 %b) {
 ; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v39.l
 ; GFX11-TRUE16-NEXT:    v_and_b16 v1.h, 0xff, v1.h
 ; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v66.l
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v39.h, 0
 ; GFX11-TRUE16-NEXT:    v_and_b16 v2.l, 0xff, v2.l
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v1.l, v33.h
+; GFX11-TRUE16-NEXT:    v_and_b16 v2.h, 0xff, v2.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v1.l, v1.l, v33.h
 ; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v162.l
 ; GFX11-TRUE16-NEXT:    v_or_b16 v1.h, v1.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v39.h
-; GFX11-TRUE16-NEXT:    v_and_b16 v2.h, 0xff, v2.h
 ; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v161.l
 ; GFX11-TRUE16-NEXT:    v_and_b16 v3.l, 0xff, v3.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v35.h, 8, v160.l
 ; GFX11-TRUE16-NEXT:    v_and_b16 v3.h, 0xff, v3.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v1, v39, v1
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v2.l, v33.h
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v36.h, 8, v65.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v2.l, v2.l, v33.h
 ; GFX11-TRUE16-NEXT:    v_or_b16 v2.h, v2.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v2.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v160.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v65.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v3.l, v3.l, v35.h
 ; GFX11-TRUE16-NEXT:    v_and_b16 v4.l, 0xff, v4.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v4.h, 0xff, v4.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v2, v39, v2
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v3.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v3.h, v3.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v3.l, v39.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v3.h, v3.h, v36.h
 ; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v151.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v4.h, 0xff, v4.h
 ; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v150.l
 ; GFX11-TRUE16-NEXT:    v_and_b16 v5.l, 0xff, v5.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v35.h, 8, v149.l
 ; GFX11-TRUE16-NEXT:    v_and_b16 v5.h, 0xff, v5.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v3, v39, v3
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v4.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v4.h, v4.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v4.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v149.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v64.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v36.h, 8, v64.l
 ; GFX11-TRUE16-NEXT:    v_and_b16 v6.l, 0xff, v6.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v37.h, 8, v148.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v4.l, v4.l, v33.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v4.h, v4.h, v34.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v5.l, v5.l, v35.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v5.h, v5.h, v36.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v6.l, v6.l, v37.h
 ; GFX11-TRUE16-NEXT:    v_and_b16 v6.h, 0xff, v6.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v4, v39, v4
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v5.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v5.h, v5.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v5.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v148.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v147.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v147.l
 ; GFX11-TRUE16-NEXT:    v_and_b16 v7.l, 0xff, v7.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v146.l
 ; GFX11-TRUE16-NEXT:    v_and_b16 v7.h, 0xff, v7.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v5, v39, v5
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v6.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v6.h, v6.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v6.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v146.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v54.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v35.h, 8, v54.l
 ; GFX11-TRUE16-NEXT:    v_and_b16 v8.l, 0xff, v8.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v36.h, 8, v145.l
 ; GFX11-TRUE16-NEXT:    v_and_b16 v8.h, 0xff, v8.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v6, v39, v6
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v7.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v7.h, v7.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v7.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v145.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v144.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v37.h, 8, v144.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v6.h, v6.h, v33.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v7.l, v7.l, v34.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v7.h, v7.h, v35.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v8.l, v8.l, v36.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v8.h, v8.h, v37.h
 ; GFX11-TRUE16-NEXT:    v_and_b16 v9.l, 0xff, v9.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v9.h, 0xff, v9.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v7, v39, v7
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v8.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v8.h, v8.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v8.l, v39.h
 ; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v135.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v9.h, 0xff, v9.h
 ; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v53.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v10.l, 0xff, v10.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v10.h, 0xff, v10.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v8, v39, v8
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v9.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v9.h, v9.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v9.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v134.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v133.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v11.l, 0xff, v11.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v11.h, 0xff, v11.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v9, v39, v9
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v10.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v10.h, v10.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v10.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v132.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v52.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v12.l, 0xff, v12.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v12.h, 0xff, v12.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v10, v39, v10
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v11.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v11.h, v11.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v11.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v131.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v130.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v13.l, 0xff, v13.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v13.h, 0xff, v13.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v11, v39, v11
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v12.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v12.h, v12.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v12.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v129.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v51.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v14.l, 0xff, v14.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v14.h, 0xff, v14.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v12, v39, v12
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v13.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v13.h, v13.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v13.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v128.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v119.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v15.l, 0xff, v15.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v15.h, 0xff, v15.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v13, v39, v13
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v14.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v14.h, v14.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v14.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v118.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v50.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v16.l, 0xff, v16.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v16.h, 0xff, v16.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v14, v39, v14
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v15.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v15.h, v15.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v15.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v117.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v116.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v17.l, 0xff, v17.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v17.h, 0xff, v17.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v15, v39, v15
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v16.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v16.h, v16.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v16.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v115.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v49.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v18.l, 0xff, v18.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v18.h, 0xff, v18.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v16, v39, v16
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v17.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v17.h, v17.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v17.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v114.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v113.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v19.l, 0xff, v19.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v19.h, 0xff, v19.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v17, v39, v17
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v18.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v18.h, v18.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v18.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v112.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v48.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v20.l, 0xff, v20.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v20.h, 0xff, v20.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v18, v39, v18
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v19.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v19.h, v19.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v19.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v103.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v102.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v21.l, 0xff, v21.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v21.h, 0xff, v21.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v19, v39, v19
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v20.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v20.h, v20.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v20.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v101.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v38.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v22.l, 0xff, v22.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v22.h, 0xff, v22.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v20, v39, v20
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v21.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v21.h, v21.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v21.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v100.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v99.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v23.l, 0xff, v23.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v23.h, 0xff, v23.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v21, v39, v21
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v22.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v22.h, v22.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v22.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v98.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v37.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v24.l, 0xff, v24.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v24.h, 0xff, v24.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v22, v39, v22
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v23.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v23.h, v23.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v23.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v97.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v96.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v25.l, 0xff, v25.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v25.h, 0xff, v25.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v23, v39, v23
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v24.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v24.h, v24.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v24.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v87.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v36.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v26.l, 0xff, v26.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v26.h, 0xff, v26.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v24, v39, v24
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v25.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v25.h, v25.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v25.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v86.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v85.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v27.l, 0xff, v27.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v27.h, 0xff, v27.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v25, v39, v25
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v26.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v26.h, v26.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v26.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v84.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v35.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v28.l, 0xff, v28.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v28.h, 0xff, v28.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v26, v39, v26
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v27.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v27.h, v27.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v27.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v83.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.h, 8, v82.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v29.l, 0xff, v29.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v29.h, 0xff, v29.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v27, v39, v27
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v28.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v28.h, v28.h, v34.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v28.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v81.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.l, 8, v34.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v30.l, 0xff, v30.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v30.h, 0xff, v30.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v28, v39, v28
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v29.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v29.h, v29.h, v34.l
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v29.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v80.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v34.l, 8, v71.l
-; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-TRUE16-NEXT:    v_and_b16 v31.l, 0xff, v31.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v31.h, 0xff, v31.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v29, v39, v29
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v30.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v30.h, v30.h, v34.l
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v30.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v70.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.l, 8, v33.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v32.l, 0xff, v32.l
-; GFX11-TRUE16-NEXT:    v_and_b16 v32.h, 0xff, v32.h
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v30, v39, v30
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v31.l, v33.h
-; GFX11-TRUE16-NEXT:    v_or_b16 v31.h, v31.h, v33.l
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v31.l, v39.h
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.l, 8, v69.l
-; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v33.h, 8, v68.l
 ; GFX11-TRUE16-NEXT:    s_clause 0x1
 ; GFX11-TRUE16-NEXT:    scratch_store_b128 v0, v[1:4], off
 ; GFX11-TRUE16-NEXT:    scratch_store_b128 v0, v[5:8], off offset:16
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v31, v39, v31
-; GFX11-TRUE16-NEXT:    v_or_b16 v39.l, v32.l, v33.l
-; GFX11-TRUE16-NEXT:    v_or_b16 v32.h, v32.h, v33.h
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v32.l, v39.h
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_or_b32_e32 v32, v39, v32
+; GFX11-TRUE16-NEXT:    v_and_b16 v2.h, 0xff, v10.h
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v3.l, 8, v133.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v3.h, 0xff, v11.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v4.l, 8, v132.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v4.h, 0xff, v11.h
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v5.l, 8, v52.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v5.h, 0xff, v12.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v6.l, 8, v131.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v6.h, 0xff, v12.h
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v7.l, 8, v130.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v10.l, 0xff, v10.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v35.h, 8, v134.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v1.l, v9.l, v33.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v1.h, v9.h, v34.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v2.h, v2.h, v3.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v3.l, v3.h, v4.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v3.h, v4.h, v5.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v4.l, v5.h, v6.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v4.h, v6.h, v7.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v5.l, 0xff, v13.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v5.h, 8, v129.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v6.l, 0xff, v13.h
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v6.h, 8, v51.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v7.l, 0xff, v14.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v7.h, 8, v128.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v8.l, 0xff, v14.h
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v8.h, 8, v119.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v9.l, 0xff, v15.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v9.h, 8, v118.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v2.l, v10.l, v35.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v5.l, v5.l, v5.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v5.h, v6.l, v6.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v6.l, v7.l, v7.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v6.h, v8.l, v8.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v7.l, v9.l, v9.h
+; GFX11-TRUE16-NEXT:    v_and_b16 v7.h, 0xff, v15.h
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v8.l, 8, v50.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v8.h, 0xff, v16.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v9.l, 8, v117.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v9.h, 0xff, v16.h
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v10.l, 8, v116.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v10.h, 0xff, v17.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v11.l, 8, v115.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v11.h, 0xff, v17.h
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v12.l, 8, v49.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v7.h, v7.h, v8.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v8.l, v8.h, v9.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v8.h, v9.h, v10.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v9.l, v10.h, v11.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v9.h, v11.h, v12.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v10.l, 0xff, v18.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v10.h, 8, v114.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v11.l, 0xff, v18.h
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v11.h, 8, v113.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v12.l, 0xff, v19.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v12.h, 8, v112.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v13.l, 0xff, v19.h
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v13.h, 8, v48.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v14.l, 0xff, v20.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v14.h, 8, v103.l
+; GFX11-TRUE16-NEXT:    v_or_b16 v10.l, v10.l, v10.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v10.h, v11.l, v11.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v11.l, v12.l, v12.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v11.h, v13.l, v13.h
+; GFX11-TRUE16-NEXT:    v_or_b16 v12.l, v14.l, v14.h
+; GFX11-TRUE16-NEXT:    v_and_b16 v12.h, 0xff, v20.h
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v13.l, 8, v102.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v13.h, 0xff, v21.l
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v14.l, 8, v101.l
+; GFX11-TRUE16-NEXT:    v_and_b16 v14.h,...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/162047


More information about the llvm-commits mailing list