[llvm] Try to simplify select v32i4 case by legalizing v16i4 (PR #173328)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 22 18:07:56 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (Shoreshen)
<details>
<summary>Changes</summary>
For #<!-- -->160969 , by making `v16i4` customize for select in amdgpu, simplify the compiled result as follow:
```
; %bb.0:
s_load_dwordx4 s[0:3], s[4:5], 0x0
s_load_dwordx2 s[6:7], s[4:5], 0x10
v_mov_b32_e32 v8, 0
s_waitcnt lgkmcnt(0)
global_load_dwordx4 v[0:3], v8, s[2:3]
global_load_dwordx4 v[4:7], v8, s[6:7]
s_load_dword s2, s[4:5], 0x18
s_waitcnt lgkmcnt(0)
s_cmp_eq_u32 s2, 0
s_cselect_b64 vcc, -1, 0
s_waitcnt vmcnt(0)
v_cndmask_b32_e32 v3, v7, v3, vcc
v_cndmask_b32_e32 v2, v6, v2, vcc
v_cndmask_b32_e32 v1, v5, v1, vcc
v_cndmask_b32_e32 v0, v4, v0, vcc
global_store_dwordx4 v8, v[0:3], s[0:1]
s_endpgm
```
---
Full diff: https://github.com/llvm/llvm-project/pull/173328.diff
3 Files Affected:
- (modified) llvm/include/llvm/CodeGen/ValueTypes.td (+2)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/select-vectors.ll (+15)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 74ea86774a8ee..3ef87dc38a85d 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -113,6 +113,8 @@ def v4096i1 : VTVec<4096, i1>; // 4096 x i1 vector value
def v128i2 : VTVec<128, i2>; // 128 x i2 vector value
def v256i2 : VTVec<256, i2>; // 256 x i2 vector value
+def v16i4 : VTVec<16, i4>; // 16 x i4 vector value
+def v32i4 : VTVec<32, i4>; // 32 x i4 vector value
def v64i4 : VTVec<64, i4>; // 64 x i4 vector value
def v128i4 : VTVec<128, i4>; // 128 x i4 vector value
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 102ca92856bae..03d4f9c09dc2a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -876,7 +876,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
{MVT::v4i16, MVT::v4f16, MVT::v4bf16, MVT::v2i8, MVT::v4i8,
MVT::v8i8, MVT::v8i16, MVT::v8f16, MVT::v8bf16,
MVT::v16i16, MVT::v16f16, MVT::v16bf16, MVT::v32i16,
- MVT::v32f16, MVT::v32bf16},
+ MVT::v32f16, MVT::v32bf16, MVT::v16i4},
Custom);
setOperationAction({ISD::SMULO, ISD::UMULO}, MVT::i64, Custom);
diff --git a/llvm/test/CodeGen/AMDGPU/select-vectors.ll b/llvm/test/CodeGen/AMDGPU/select-vectors.ll
index e754f665c5f43..5e52b2fca32c8 100644
--- a/llvm/test/CodeGen/AMDGPU/select-vectors.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-vectors.ll
@@ -65,6 +65,21 @@ define amdgpu_kernel void @v_select_v16i8(ptr addrspace(1) %out, ptr addrspace(1
ret void
}
+; GCN-LABEL: {{^}}v_select_v32i4:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN-NOT: cndmask
+define amdgpu_kernel void @v_select_v32i4(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(4) %b.ptr, i32 %c) #0 {
+ %a = load <32 x i4>, ptr addrspace(1) %a.ptr, align 2
+ %b = load <32 x i4>, ptr addrspace(4) %b.ptr, align 2
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <32 x i4> %a, <32 x i4> %b
+ store <32 x i4> %select, ptr addrspace(1) %out, align 2
+ ret void
+}
+
; GCN-LABEL: {{^}}select_v4i8:
; GFX89: s_cselect_b32
; GFX89-NOT: s_cselect_b32
``````````
</details>
https://github.com/llvm/llvm-project/pull/173328
More information about the llvm-commits
mailing list