[llvm] [AMDGPU] Add Srl combine for extracting last element of BUILD_VECTOR (PR #181412)

via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 13 12:13:04 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Frederick Vu (FrederickVu)

<details>
<summary>Changes</summary>

While working on another combine, I noticed some redundant zext shift pairs `v_lshrrev_b32 + v_lshlrev_b32` coming from a `build_vector(undef, x)` created by `TargetLowering::SimplifyDemandedBits` and a `srl` created by `lowerEXTRACT_VECTOR_ELT`. 

---

Patch is 1.07 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/181412.diff


11 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+18) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll (+4525-4532) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll (+267-265) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll (+796-814) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll (+1754-1765) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll (+76-81) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll (+154-161) 
- (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll (+76-132) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll (+32-76) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-nontemporal-metadata.ll (+202-482) 
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-lshl_add.ll (+17-37) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index da21033388532..eb6d05d1c0951 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4396,6 +4396,24 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
         }
       }
     }
+
+    // fold (srl (bitcast (build_vector e0, ..., eN)), N * eltsize) -> (zext eN)
+    if (VT.isScalarInteger()) {
+      SDValue BV = peekThroughBitcasts(LHS);
+      if (BV.getOpcode() == ISD::BUILD_VECTOR) {
+        EVT BVVT = BV.getValueType();
+        unsigned EltSizeInBits = BVVT.getScalarSizeInBits();
+        unsigned NumElts = BVVT.getVectorNumElements();
+        if (RHSVal == (NumElts - 1) * EltSizeInBits) {
+          SDValue LastElt = BV.getOperand(NumElts - 1);
+          if (!LastElt.isUndef()) {
+            EVT IntEltVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits);
+            return DAG.getZExtOrTrunc(DAG.getBitcast(IntEltVT, LastElt), SL,
+                                      VT);
+          }
+        }
+      }
+    }
   }
 
   if (VT.getScalarType() != MVT::i64)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
index 1b1f7fcadc540..16dfd6d37c3cb 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
@@ -147321,418 +147321,411 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) {
 ; GFX9-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:4
+; GFX9-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:4
+; GFX9-NEXT:    s_nop 0
 ; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:8
-; GFX9-NEXT:    buffer_load_dword v57, off, s[0:3], s32
-; GFX9-NEXT:    v_mov_b32_e32 v54, v0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
+; GFX9-NEXT:    buffer_load_dword v60, off, s[0:3], s32
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; kill: killed $vgpr0
+; GFX9-NEXT:    ; kill: killed $vgpr33
+; GFX9-NEXT:    ; implicit-def: $vgpr33
+; GFX9-NEXT:    ; kill: killed $vgpr33
+; GFX9-NEXT:    ; implicit-def: $vgpr33
+; GFX9-NEXT:    ; kill: killed $vgpr33
+; GFX9-NEXT:    ; implicit-def: $vgpr33
+; GFX9-NEXT:    ; kill: killed $vgpr33
+; GFX9-NEXT:    ; implicit-def: $vgpr33
+; GFX9-NEXT:    ; implicit-def: $vgpr35
+; GFX9-NEXT:    ; kill: killed $vgpr33
+; GFX9-NEXT:    ; implicit-def: $vgpr33
+; GFX9-NEXT:    ; kill: killed $vgpr35
+; GFX9-NEXT:    ; implicit-def: $vgpr35
+; GFX9-NEXT:    ; kill: killed $vgpr33
+; GFX9-NEXT:    ; implicit-def: $vgpr33
+; GFX9-NEXT:    ; kill: killed $vgpr35
+; GFX9-NEXT:    ; implicit-def: $vgpr35
+; GFX9-NEXT:    ; kill: killed $vgpr33
+; GFX9-NEXT:    ; implicit-def: $vgpr33
+; GFX9-NEXT:    ; kill: killed $vgpr35
+; GFX9-NEXT:    ; implicit-def: $vgpr35
+; GFX9-NEXT:    v_mov_b32_e32 v45, v0
 ; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
+; GFX9-NEXT:    ; kill: killed $vgpr35
+; GFX9-NEXT:    ; implicit-def: $vgpr35
 ; GFX9-NEXT:    ; kill: killed $vgpr0
 ; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
+; GFX9-NEXT:    ; implicit-def: $vgpr43
+; GFX9-NEXT:    ; kill: killed $vgpr35
+; GFX9-NEXT:    ; implicit-def: $vgpr35
 ; GFX9-NEXT:    ; kill: killed $vgpr0
 ; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr34
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
+; GFX9-NEXT:    ; kill: killed $vgpr43
+; GFX9-NEXT:    ; implicit-def: $vgpr43
+; GFX9-NEXT:    ; kill: killed $vgpr35
+; GFX9-NEXT:    ; implicit-def: $vgpr35
+; GFX9-NEXT:    v_mov_b32_e32 v56, v15
 ; GFX9-NEXT:    ; kill: killed $vgpr0
 ; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; kill: killed $vgpr34
-; GFX9-NEXT:    ; implicit-def: $vgpr34
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
+; GFX9-NEXT:    ; implicit-def: $vgpr15
+; GFX9-NEXT:    ; kill: killed $vgpr43
+; GFX9-NEXT:    ; implicit-def: $vgpr43
+; GFX9-NEXT:    ; kill: killed $vgpr35
+; GFX9-NEXT:    ; implicit-def: $vgpr35
 ; GFX9-NEXT:    ; kill: killed $vgpr0
 ; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; kill: killed $vgpr34
-; GFX9-NEXT:    ; implicit-def: $vgpr34
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
+; GFX9-NEXT:    ; kill: killed $vgpr15
+; GFX9-NEXT:    ; implicit-def: $vgpr15
+; GFX9-NEXT:    ; kill: killed $vgpr43
+; GFX9-NEXT:    ; kill: killed $vgpr35
+; GFX9-NEXT:    ; implicit-def: $vgpr43
+; GFX9-NEXT:    ; implicit-def: $vgpr35
+; GFX9-NEXT:    v_mov_b32_e32 v57, v16
 ; GFX9-NEXT:    ; kill: killed $vgpr0
 ; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; kill: killed $vgpr34
-; GFX9-NEXT:    ; implicit-def: $vgpr34
+; GFX9-NEXT:    ; implicit-def: $vgpr16
+; GFX9-NEXT:    ; implicit-def: $vgpr44
 ; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    ; implicit-def: $vgpr44
-; GFX9-NEXT:    ; implicit-def: $vgpr42
-; GFX9-NEXT:    ; implicit-def: $vgpr51
-; GFX9-NEXT:    ; implicit-def: $vgpr41
-; GFX9-NEXT:    ; implicit-def: $vgpr53
+; GFX9-NEXT:    ; kill: killed $vgpr15
+; GFX9-NEXT:    ; implicit-def: $vgpr34
+; GFX9-NEXT:    ; implicit-def: $vgpr36
+; GFX9-NEXT:    ; kill: killed $vgpr43
+; GFX9-NEXT:    ; kill: killed $vgpr35
+; GFX9-NEXT:    ; implicit-def: $vgpr43
+; GFX9-NEXT:    ; implicit-def: $vgpr35
+; GFX9-NEXT:    ; implicit-def: $vgpr15
+; GFX9-NEXT:    ; implicit-def: $vgpr55
 ; GFX9-NEXT:    ; implicit-def: $vgpr40
+; GFX9-NEXT:    ; implicit-def: $vgpr54
 ; GFX9-NEXT:    ; implicit-def: $vgpr32
-; GFX9-NEXT:    ; implicit-def: $vgpr56
-; GFX9-NEXT:    ; implicit-def: $vgpr43
 ; GFX9-NEXT:    ; kill: killed $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr39
-; GFX9-NEXT:    ; implicit-def: $vgpr63
 ; GFX9-NEXT:    ; implicit-def: $vgpr0
-; GFX9-NEXT:    ; implicit-def: $vgpr35
-; GFX9-NEXT:    ; implicit-def: $vgpr46
-; GFX9-NEXT:    ; implicit-def: $vgpr45
-; GFX9-NEXT:    ; implicit-def: $vgpr37
-; GFX9-NEXT:    ; implicit-def: $vgpr36
+; GFX9-NEXT:    ; implicit-def: $vgpr49
+; GFX9-NEXT:    ; implicit-def: $vgpr53
+; GFX9-NEXT:    ; implicit-def: $vgpr47
 ; GFX9-NEXT:    ; implicit-def: $vgpr59
-; GFX9-NEXT:    ; implicit-def: $vgpr38
-; GFX9-NEXT:    ; implicit-def: $vgpr60
+; GFX9-NEXT:    ; implicit-def: $vgpr42
+; GFX9-NEXT:    ; implicit-def: $vgpr37
 ; GFX9-NEXT:    ; implicit-def: $vgpr48
-; GFX9-NEXT:    ; implicit-def: $vgpr50
-; GFX9-NEXT:    ; implicit-def: $vgpr49
+; GFX9-NEXT:    ; implicit-def: $vgpr38
+; GFX9-NEXT:    ; implicit-def: $vgpr63
+; GFX9-NEXT:    ; implicit-def: $vgpr41
+; GFX9-NEXT:    ; implicit-def: $vgpr62
+; GFX9-NEXT:    ; implicit-def: $vgpr39
 ; GFX9-NEXT:    ; implicit-def: $vgpr52
-; GFX9-NEXT:    ; implicit-def: $vgpr55
-; GFX9-NEXT:    ; implicit-def: $vgpr47
-; GFX9-NEXT:    ; kill: killed $vgpr34
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; kill: killed $vgpr33
 ; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; kill: killed $vgpr34
+; GFX9-NEXT:    ; implicit-def: $vgpr34
+; GFX9-NEXT:    ; implicit-def: $vgpr46
+; GFX9-NEXT:    ; kill: killed $vgpr36
+; GFX9-NEXT:    ; kill: killed $vgpr43
+; GFX9-NEXT:    ; kill: killed $vgpr35
+; GFX9-NEXT:    ; implicit-def: $vgpr50
+; GFX9-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; implicit-def: $vgpr15
+; GFX9-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; implicit-def: $vgpr15
+; GFX9-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; implicit-def: $vgpr15
+; GFX9-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; implicit-def: $vgpr15
+; GFX9-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; implicit-def: $vgpr15
+; GFX9-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; implicit-def: $vgpr35
+; GFX9-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; implicit-def: $vgpr15
+; GFX9-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; implicit-def: $vgpr15
+; GFX9-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; implicit-def: $vgpr35
+; GFX9-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; implicit-def: $vgpr35
+; GFX9-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; implicit-def: $vgpr35
+; GFX9-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; implicit-def: $vgpr35
+; GFX9-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; GFX9-NEXT:    ; implicit-def: $vgpr33
-; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-NEXT:    ; implicit-def: $vgpr58
+; GFX9-NEXT:    ; implicit-def: $vgpr43
+; GFX9-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_waitcnt vmcnt(33)
+; GFX9-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_waitcnt vmcnt(29)
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v31
 ; GFX9-NEXT:    ; implicit-def: $vgpr31
+; GFX9-NEXT:    ; kill: killed $vgpr31
+; GFX9-NEXT:    ; implicit-def: $vgpr31
+; GFX9-NEXT:    ; kill: killed $vgpr31
+; GFX9-NEXT:    ; implicit-def: $vgpr31
+; GFX9-NEXT:    ; kill: killed $vgpr31
+; GFX9-NEXT:    ; implicit-def: $vgpr31
+; GFX9-NEXT:    ; kill: killed $vgpr31
+; GFX9-NEXT:    ; implicit-def: $vgpr31
 ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GFX9-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; GFX9-NEXT:    s_cbranch_execz .LBB90_2
 ; GFX9-NEXT:  ; %bb.1: ; %cmp.false
-; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v16
-; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
-; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v16
-; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
-; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v16
-; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
-; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v15
-; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
-; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v15
-; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GFX9-NEXT:    v_lshrrev_b64 v[58:59], 24, v[56:57]
+; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v57
+; GFX9-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-NEXT:    v_lshrrev_b64 v[58:59], 24, v[13:14]
+; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-NEXT:    v_lshrrev_b64 v[58:59], 24, v[11:12]
+; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v57
+; GFX9-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-NEXT:    v_lshrrev_b64 v[58:59], 24, v[9:10]
+; GFX9-NEXT:    v_lshrrev_b32_e32 v33, 24, v4
+; GFX9-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-NEXT:    v_lshrrev_b64 v[58:59], 24, v[7:8]
+; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    buffer_store_dw...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/181412


More information about the llvm-commits mailing list