[llvm] [AMDGPU] Teach CalculateByteProvider about AMDGPUISD::PERM (PR #65547)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 6 16:07:40 PDT 2023


https://github.com/jrbyrnes created https://github.com/llvm/llvm-project/pull/65547:

As a standalone patch, it has limited effect. However, it is necessary as it supports upcoming commits.

>From 9f3a4f763edaf569fc20239b628e0a79ab450758 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 6 Sep 2023 15:51:12 -0700
Subject: [PATCH] [AMDGPU] Teach CalculateByteProvider about AMDGPUISD::PERM

Change-Id: I4a9e0f7d7cbe01719e6bab24387b6bb2cb93315b
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 18 ++++++++++++
 llvm/test/CodeGen/AMDGPU/permute_i8.ll    | 35 +++++++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 805c39ecbd46d43..2eb8c61b87ea52e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10762,6 +10762,24 @@ calculateByteProvider(const SDValue &Op, unsigned Index, unsigned Depth,
                             StartingIndex, Index);
   }
 
+  case AMDGPUISD::PERM: {
+    auto PermMask = dyn_cast<ConstantSDNode>(Op->getOperand(2));
+    if (!PermMask)
+      return std::nullopt;
+
+    auto IdxMask =
+        (PermMask->getZExtValue() & (0xFF << (Index * 8))) >> (Index * 8);
+    if (IdxMask > 0x07 && IdxMask != 0x0c)
+      return std::nullopt;
+
+    auto NextOp = IdxMask > 0x03 ? Op.getOperand(0) : Op.getOperand(1);
+    auto NextIndex = IdxMask > 0x03 ? IdxMask % 4 : IdxMask;
+
+    return IdxMask != 0x0c ? calculateSrcByte(NextOp, StartingIndex, NextIndex)
+                           : std::optional<ByteProvider<SDValue>>(
+                                 ByteProvider<SDValue>::getConstantZero());
+  }
+
   default: {
     return std::nullopt;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
index 2d8a64e6bcbc80d..c71f69edc76fa6e 100644
--- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
@@ -2794,6 +2794,41 @@ define hidden void @extract3744(ptr addrspace(1) %in0, ptr addrspace(1) %in1, pt
   ret void
 }
 
+declare i32 @llvm.amdgcn.perm(i32, i32, i32)
+
+define hidden void @extract_perm_3744(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %out0) {
+; GFX10-LABEL: extract_perm_3744:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    global_load_dword v6, v[0:1], off
+; GFX10-NEXT:    global_load_dword v7, v[2:3], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    v_perm_b32 v0, v6, v7, 0x3070404
+; GFX10-NEXT:    global_store_dword v[4:5], v0, off
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: extract_perm_3744:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v6, v[0:1], off
+; GFX9-NEXT:    global_load_dword v7, v[2:3], off
+; GFX9-NEXT:    s_mov_b32 s4, 0x3070404
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_perm_b32 v0, v6, v7, s4
+; GFX9-NEXT:    global_store_dword v[4:5], v0, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %vec1 = load <4 x i8>, ptr addrspace(1) %in0, align 4
+  %vec2 = load <4 x i8>, ptr addrspace(1) %in1, align 4
+  %cast1 = bitcast <4 x i8> %vec1 to i32
+  %cast2 = bitcast <4 x i8> %vec2 to i32
+  %lo24 = call i32 @llvm.amdgcn.perm(i32 %cast1, i32 %cast1, i32 201523200)
+  %hi8 = call i32 @llvm.amdgcn.perm(i32 %cast2, i32 %cast2, i32 51121164)
+  %res = or i32 %hi8, %lo24
+  store i32 %res, ptr addrspace(1) %out0, align 4
+  ret void
+}
+
 define hidden void @extract1347_v2i16(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %out0) {
 ; GFX10-LABEL: extract1347_v2i16:
 ; GFX10:       ; %bb.0:



More information about the llvm-commits mailing list