[llvm] 5044531 - [AMDGPU] Teach CalculateByteProvider about AMDGPUISD::PERM (#65547)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 7 15:13:46 PDT 2023


Author: Jeffrey Byrnes
Date: 2023-09-07T15:13:42-07:00
New Revision: 5044531afd4b4f03ad2ea593481a879564bc05f0

URL: https://github.com/llvm/llvm-project/commit/5044531afd4b4f03ad2ea593481a879564bc05f0
DIFF: https://github.com/llvm/llvm-project/commit/5044531afd4b4f03ad2ea593481a879564bc05f0.diff

LOG: [AMDGPU] Teach CalculateByteProvider about AMDGPUISD::PERM (#65547)

As a standalone patch, it has limited effect. However, it is necessary
as it supports upcoming commits.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/permute_i8.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9c69ea45939183..85c9ed489e926c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10762,6 +10762,24 @@ calculateByteProvider(const SDValue &Op, unsigned Index, unsigned Depth,
                             StartingIndex, Index);
   }
 
+  case AMDGPUISD::PERM: {
+    auto PermMask = dyn_cast<ConstantSDNode>(Op->getOperand(2));
+    if (!PermMask)
+      return std::nullopt;
+
+    auto IdxMask =
+        (PermMask->getZExtValue() & (0xFF << (Index * 8))) >> (Index * 8);
+    if (IdxMask > 0x07 && IdxMask != 0x0c)
+      return std::nullopt;
+
+    auto NextOp = Op.getOperand(IdxMask > 0x03 ? 0 : 1);
+    auto NextIndex = IdxMask > 0x03 ? IdxMask % 4 : IdxMask;
+
+    return IdxMask != 0x0c ? calculateSrcByte(NextOp, StartingIndex, NextIndex)
+                           : ByteProvider<SDValue>(
+                                 ByteProvider<SDValue>::getConstantZero());
+  }
+
   default: {
     return std::nullopt;
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
index 2d8a64e6bcbc80..c71f69edc76fa6 100644
--- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
@@ -2794,6 +2794,41 @@ define hidden void @extract3744(ptr addrspace(1) %in0, ptr addrspace(1) %in1, pt
   ret void
 }
 
+declare i32 @llvm.amdgcn.perm(i32, i32, i32)
+
+define hidden void @extract_perm_3744(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %out0) {
+; GFX10-LABEL: extract_perm_3744:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    global_load_dword v6, v[0:1], off
+; GFX10-NEXT:    global_load_dword v7, v[2:3], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    v_perm_b32 v0, v6, v7, 0x3070404
+; GFX10-NEXT:    global_store_dword v[4:5], v0, off
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: extract_perm_3744:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v6, v[0:1], off
+; GFX9-NEXT:    global_load_dword v7, v[2:3], off
+; GFX9-NEXT:    s_mov_b32 s4, 0x3070404
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_perm_b32 v0, v6, v7, s4
+; GFX9-NEXT:    global_store_dword v[4:5], v0, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %vec1 = load <4 x i8>, ptr addrspace(1) %in0, align 4
+  %vec2 = load <4 x i8>, ptr addrspace(1) %in1, align 4
+  %cast1 = bitcast <4 x i8> %vec1 to i32
+  %cast2 = bitcast <4 x i8> %vec2 to i32
+  %lo24 = call i32 @llvm.amdgcn.perm(i32 %cast1, i32 %cast1, i32 201523200)
+  %hi8 = call i32 @llvm.amdgcn.perm(i32 %cast2, i32 %cast2, i32 51121164)
+  %res = or i32 %hi8, %lo24
+  store i32 %res, ptr addrspace(1) %out0, align 4
+  ret void
+}
+
 define hidden void @extract1347_v2i16(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %out0) {
 ; GFX10-LABEL: extract1347_v2i16:
 ; GFX10:       ; %bb.0:


        


More information about the llvm-commits mailing list