[llvm] 5044531 - [AMDGPU] Teach CalculateByteProvider about AMDGPUISD::PERM (#65547)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 7 15:13:46 PDT 2023
Author: Jeffrey Byrnes
Date: 2023-09-07T15:13:42-07:00
New Revision: 5044531afd4b4f03ad2ea593481a879564bc05f0
URL: https://github.com/llvm/llvm-project/commit/5044531afd4b4f03ad2ea593481a879564bc05f0
DIFF: https://github.com/llvm/llvm-project/commit/5044531afd4b4f03ad2ea593481a879564bc05f0.diff
LOG: [AMDGPU] Teach CalculateByteProvider about AMDGPUISD::PERM (#65547)
As a standalone patch, it has limited effect. However, it is necessary
as it supports upcoming commits.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/permute_i8.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9c69ea45939183..85c9ed489e926c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10762,6 +10762,24 @@ calculateByteProvider(const SDValue &Op, unsigned Index, unsigned Depth,
StartingIndex, Index);
}
+ case AMDGPUISD::PERM: {
+ auto PermMask = dyn_cast<ConstantSDNode>(Op->getOperand(2));
+ if (!PermMask)
+ return std::nullopt;
+
+ auto IdxMask =
+ (PermMask->getZExtValue() & (0xFF << (Index * 8))) >> (Index * 8);
+ if (IdxMask > 0x07 && IdxMask != 0x0c)
+ return std::nullopt;
+
+ auto NextOp = Op.getOperand(IdxMask > 0x03 ? 0 : 1);
+ auto NextIndex = IdxMask > 0x03 ? IdxMask % 4 : IdxMask;
+
+ return IdxMask != 0x0c ? calculateSrcByte(NextOp, StartingIndex, NextIndex)
+ : ByteProvider<SDValue>(
+ ByteProvider<SDValue>::getConstantZero());
+ }
+
default: {
return std::nullopt;
}
diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
index 2d8a64e6bcbc80..c71f69edc76fa6 100644
--- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
@@ -2794,6 +2794,41 @@ define hidden void @extract3744(ptr addrspace(1) %in0, ptr addrspace(1) %in1, pt
ret void
}
+declare i32 @llvm.amdgcn.perm(i32, i32, i32)
+
+define hidden void @extract_perm_3744(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %out0) {
+; GFX10-LABEL: extract_perm_3744:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v6, v[0:1], off
+; GFX10-NEXT: global_load_dword v7, v[2:3], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_perm_b32 v0, v6, v7, 0x3070404
+; GFX10-NEXT: global_store_dword v[4:5], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: extract_perm_3744:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v6, v[0:1], off
+; GFX9-NEXT: global_load_dword v7, v[2:3], off
+; GFX9-NEXT: s_mov_b32 s4, 0x3070404
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_perm_b32 v0, v6, v7, s4
+; GFX9-NEXT: global_store_dword v[4:5], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec1 = load <4 x i8>, ptr addrspace(1) %in0, align 4
+ %vec2 = load <4 x i8>, ptr addrspace(1) %in1, align 4
+ %cast1 = bitcast <4 x i8> %vec1 to i32
+ %cast2 = bitcast <4 x i8> %vec2 to i32
+ %lo24 = call i32 @llvm.amdgcn.perm(i32 %cast1, i32 %cast1, i32 201523200)
+ %hi8 = call i32 @llvm.amdgcn.perm(i32 %cast2, i32 %cast2, i32 51121164)
+ %res = or i32 %hi8, %lo24
+ store i32 %res, ptr addrspace(1) %out0, align 4
+ ret void
+}
+
define hidden void @extract1347_v2i16(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %out0) {
; GFX10-LABEL: extract1347_v2i16:
; GFX10: ; %bb.0:
More information about the llvm-commits
mailing list