[llvm] [AMDGPU] Add Srl combine for extracting last element of BUILD_VECTOR (PR #181412)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 13 12:13:04 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Frederick Vu (FrederickVu)
<details>
<summary>Changes</summary>
While working on another combine, I noticed some redundant zext shift pairs `v_lshrrev_b32 + v_lshlrev_b32` coming from a `build_vector(undef, x)` created by `TargetLowering::SimplifyDemandedBits` and a `srl` created by `lowerEXTRACT_VECTOR_ELT`.
---
Patch is 1.07 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/181412.diff
11 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+18)
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll (+4525-4532)
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll (+267-265)
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll (+796-814)
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll (+1754-1765)
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll (+76-81)
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll (+154-161)
- (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll (+76-132)
- (modified) llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll (+32-76)
- (modified) llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-nontemporal-metadata.ll (+202-482)
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-lshl_add.ll (+17-37)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index da21033388532..eb6d05d1c0951 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4396,6 +4396,24 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
}
}
}
+
+ // fold (srl (bitcast (build_vector e0, ..., eN)), N * eltsize) -> (zext eN)
+ if (VT.isScalarInteger()) {
+ SDValue BV = peekThroughBitcasts(LHS);
+ if (BV.getOpcode() == ISD::BUILD_VECTOR) {
+ EVT BVVT = BV.getValueType();
+ unsigned EltSizeInBits = BVVT.getScalarSizeInBits();
+ unsigned NumElts = BVVT.getVectorNumElements();
+ if (RHSVal == (NumElts - 1) * EltSizeInBits) {
+ SDValue LastElt = BV.getOperand(NumElts - 1);
+ if (!LastElt.isUndef()) {
+ EVT IntEltVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits);
+ return DAG.getZExtOrTrunc(DAG.getBitcast(IntEltVT, LastElt), SL,
+ VT);
+ }
+ }
+ }
+ }
}
if (VT.getScalarType() != MVT::i64)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
index 1b1f7fcadc540..16dfd6d37c3cb 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
@@ -147321,418 +147321,411 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) {
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GFX9-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:4
+; GFX9-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:4
+; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8
-; GFX9-NEXT: buffer_load_dword v57, off, s[0:3], s32
-; GFX9-NEXT: v_mov_b32_e32 v54, v0
-; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
+; GFX9-NEXT: buffer_load_dword v60, off, s[0:3], s32
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr0
+; GFX9-NEXT: ; kill: killed $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; kill: killed $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; kill: killed $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; kill: killed $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr35
+; GFX9-NEXT: ; kill: killed $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; kill: killed $vgpr35
+; GFX9-NEXT: ; implicit-def: $vgpr35
+; GFX9-NEXT: ; kill: killed $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; kill: killed $vgpr35
+; GFX9-NEXT: ; implicit-def: $vgpr35
+; GFX9-NEXT: ; kill: killed $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; kill: killed $vgpr35
+; GFX9-NEXT: ; implicit-def: $vgpr35
+; GFX9-NEXT: v_mov_b32_e32 v45, v0
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; kill: killed $vgpr35
+; GFX9-NEXT: ; implicit-def: $vgpr35
; GFX9-NEXT: ; kill: killed $vgpr0
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: ; kill: killed $vgpr35
+; GFX9-NEXT: ; implicit-def: $vgpr35
; GFX9-NEXT: ; kill: killed $vgpr0
; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr34
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; kill: killed $vgpr43
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: ; kill: killed $vgpr35
+; GFX9-NEXT: ; implicit-def: $vgpr35
+; GFX9-NEXT: v_mov_b32_e32 v56, v15
; GFX9-NEXT: ; kill: killed $vgpr0
; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; kill: killed $vgpr34
-; GFX9-NEXT: ; implicit-def: $vgpr34
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: ; kill: killed $vgpr43
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: ; kill: killed $vgpr35
+; GFX9-NEXT: ; implicit-def: $vgpr35
; GFX9-NEXT: ; kill: killed $vgpr0
; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; kill: killed $vgpr34
-; GFX9-NEXT: ; implicit-def: $vgpr34
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; kill: killed $vgpr15
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: ; kill: killed $vgpr43
+; GFX9-NEXT: ; kill: killed $vgpr35
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: ; implicit-def: $vgpr35
+; GFX9-NEXT: v_mov_b32_e32 v57, v16
; GFX9-NEXT: ; kill: killed $vgpr0
; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; kill: killed $vgpr34
-; GFX9-NEXT: ; implicit-def: $vgpr34
+; GFX9-NEXT: ; implicit-def: $vgpr16
+; GFX9-NEXT: ; implicit-def: $vgpr44
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; implicit-def: $vgpr44
-; GFX9-NEXT: ; implicit-def: $vgpr42
-; GFX9-NEXT: ; implicit-def: $vgpr51
-; GFX9-NEXT: ; implicit-def: $vgpr41
-; GFX9-NEXT: ; implicit-def: $vgpr53
+; GFX9-NEXT: ; kill: killed $vgpr15
+; GFX9-NEXT: ; implicit-def: $vgpr34
+; GFX9-NEXT: ; implicit-def: $vgpr36
+; GFX9-NEXT: ; kill: killed $vgpr43
+; GFX9-NEXT: ; kill: killed $vgpr35
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: ; implicit-def: $vgpr35
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: ; implicit-def: $vgpr55
; GFX9-NEXT: ; implicit-def: $vgpr40
+; GFX9-NEXT: ; implicit-def: $vgpr54
; GFX9-NEXT: ; implicit-def: $vgpr32
-; GFX9-NEXT: ; implicit-def: $vgpr56
-; GFX9-NEXT: ; implicit-def: $vgpr43
; GFX9-NEXT: ; kill: killed $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr39
-; GFX9-NEXT: ; implicit-def: $vgpr63
; GFX9-NEXT: ; implicit-def: $vgpr0
-; GFX9-NEXT: ; implicit-def: $vgpr35
-; GFX9-NEXT: ; implicit-def: $vgpr46
-; GFX9-NEXT: ; implicit-def: $vgpr45
-; GFX9-NEXT: ; implicit-def: $vgpr37
-; GFX9-NEXT: ; implicit-def: $vgpr36
+; GFX9-NEXT: ; implicit-def: $vgpr49
+; GFX9-NEXT: ; implicit-def: $vgpr53
+; GFX9-NEXT: ; implicit-def: $vgpr47
; GFX9-NEXT: ; implicit-def: $vgpr59
-; GFX9-NEXT: ; implicit-def: $vgpr38
-; GFX9-NEXT: ; implicit-def: $vgpr60
+; GFX9-NEXT: ; implicit-def: $vgpr42
+; GFX9-NEXT: ; implicit-def: $vgpr37
; GFX9-NEXT: ; implicit-def: $vgpr48
-; GFX9-NEXT: ; implicit-def: $vgpr50
-; GFX9-NEXT: ; implicit-def: $vgpr49
+; GFX9-NEXT: ; implicit-def: $vgpr38
+; GFX9-NEXT: ; implicit-def: $vgpr63
+; GFX9-NEXT: ; implicit-def: $vgpr41
+; GFX9-NEXT: ; implicit-def: $vgpr62
+; GFX9-NEXT: ; implicit-def: $vgpr39
; GFX9-NEXT: ; implicit-def: $vgpr52
-; GFX9-NEXT: ; implicit-def: $vgpr55
-; GFX9-NEXT: ; implicit-def: $vgpr47
-; GFX9-NEXT: ; kill: killed $vgpr34
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; GFX9-NEXT: ; kill: killed $vgpr34
+; GFX9-NEXT: ; implicit-def: $vgpr34
+; GFX9-NEXT: ; implicit-def: $vgpr46
+; GFX9-NEXT: ; kill: killed $vgpr36
+; GFX9-NEXT: ; kill: killed $vgpr43
+; GFX9-NEXT: ; kill: killed $vgpr35
+; GFX9-NEXT: ; implicit-def: $vgpr50
+; GFX9-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr35
+; GFX9-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr35
+; GFX9-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr35
+; GFX9-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr35
+; GFX9-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr35
+; GFX9-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-NEXT: s_waitcnt vmcnt(33)
+; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-NEXT: s_waitcnt vmcnt(29)
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31
; GFX9-NEXT: ; implicit-def: $vgpr31
+; GFX9-NEXT: ; kill: killed $vgpr31
+; GFX9-NEXT: ; implicit-def: $vgpr31
+; GFX9-NEXT: ; kill: killed $vgpr31
+; GFX9-NEXT: ; implicit-def: $vgpr31
+; GFX9-NEXT: ; kill: killed $vgpr31
+; GFX9-NEXT: ; implicit-def: $vgpr31
+; GFX9-NEXT: ; kill: killed $vgpr31
+; GFX9-NEXT: ; implicit-def: $vgpr31
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz .LBB90_2
; GFX9-NEXT: ; %bb.1: ; %cmp.false
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v16
-; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v16
-; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v16
-; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v15
-; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v15
-; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GFX9-NEXT: v_lshrrev_b64 v[58:59], 24, v[56:57]
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v57
+; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-NEXT: v_lshrrev_b64 v[58:59], 24, v[13:14]
+; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-NEXT: v_lshrrev_b64 v[58:59], 24, v[11:12]
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v57
+; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-NEXT: v_lshrrev_b64 v[58:59], 24, v[9:10]
+; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v4
+; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-NEXT: v_lshrrev_b64 v[58:59], 24, v[7:8]
+; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dw...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/181412
More information about the llvm-commits
mailing list