[llvm] [AMDGPU] Teach CalculateByteProvider about AMDGPUISD::PERM (PR #65547)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 7 12:18:34 PDT 2023
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/65547:
>From dac044add8a640fa80ba3111484b4c3aa7211877 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 6 Sep 2023 15:51:12 -0700
Subject: [PATCH] [AMDGPU] Teach CalculateByteProvider about AMDGPUISD::PERM
Change-Id: I7ffca42eb53662e21f649540950660c076f66d9b
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 18 ++++++++++++
llvm/test/CodeGen/AMDGPU/permute_i8.ll | 35 +++++++++++++++++++++++
2 files changed, 53 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 805c39ecbd46d43..e6f9898c99689a4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10762,6 +10762,24 @@ calculateByteProvider(const SDValue &Op, unsigned Index, unsigned Depth,
StartingIndex, Index);
}
+ case AMDGPUISD::PERM: {
+ auto PermMask = dyn_cast<ConstantSDNode>(Op->getOperand(2));
+ if (!PermMask)
+ return std::nullopt;
+
+ auto IdxMask =
+ (PermMask->getZExtValue() & (0xFF << (Index * 8))) >> (Index * 8);
+ if (IdxMask > 0x07 && IdxMask != 0x0c)
+ return std::nullopt;
+
+ auto NextOp = Op.getOperand(IdxMask > 0x03 ? 0 : 1);
+ auto NextIndex = IdxMask > 0x03 ? IdxMask % 4 : IdxMask;
+
+ return IdxMask != 0x0c ? calculateSrcByte(NextOp, StartingIndex, NextIndex)
+ : ByteProvider<SDValue>(
+ ByteProvider<SDValue>::getConstantZero());
+ }
+
default: {
return std::nullopt;
}
diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
index 2d8a64e6bcbc80d..c71f69edc76fa6e 100644
--- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
@@ -2794,6 +2794,41 @@ define hidden void @extract3744(ptr addrspace(1) %in0, ptr addrspace(1) %in1, pt
ret void
}
+declare i32 @llvm.amdgcn.perm(i32, i32, i32)
+
+define hidden void @extract_perm_3744(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %out0) {
+; GFX10-LABEL: extract_perm_3744:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v6, v[0:1], off
+; GFX10-NEXT: global_load_dword v7, v[2:3], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_perm_b32 v0, v6, v7, 0x3070404
+; GFX10-NEXT: global_store_dword v[4:5], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: extract_perm_3744:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v6, v[0:1], off
+; GFX9-NEXT: global_load_dword v7, v[2:3], off
+; GFX9-NEXT: s_mov_b32 s4, 0x3070404
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_perm_b32 v0, v6, v7, s4
+; GFX9-NEXT: global_store_dword v[4:5], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec1 = load <4 x i8>, ptr addrspace(1) %in0, align 4
+ %vec2 = load <4 x i8>, ptr addrspace(1) %in1, align 4
+ %cast1 = bitcast <4 x i8> %vec1 to i32
+ %cast2 = bitcast <4 x i8> %vec2 to i32
+ %lo24 = call i32 @llvm.amdgcn.perm(i32 %cast1, i32 %cast1, i32 201523200)
+ %hi8 = call i32 @llvm.amdgcn.perm(i32 %cast2, i32 %cast2, i32 51121164)
+ %res = or i32 %hi8, %lo24
+ store i32 %res, ptr addrspace(1) %out0, align 4
+ ret void
+}
+
define hidden void @extract1347_v2i16(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %out0) {
; GFX10-LABEL: extract1347_v2i16:
; GFX10: ; %bb.0:
More information about the llvm-commits
mailing list