[PATCH] D34655: [AMDGPU] Add 2 new alignbit patterns

Stanislav Mekhanoshin via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 26 17:13:56 PDT 2017


rampitec created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl.

https://reviews.llvm.org/D34655

Files:
  lib/Target/AMDGPU/SIInstructions.td
  test/CodeGen/AMDGPU/alignbit-pat.ll


Index: test/CodeGen/AMDGPU/alignbit-pat.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/alignbit-pat.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}alignbit_shr_pat:
+; GCN-DAG: s_load_dword s[[SHR:[0-9]+]]
+; GCN-DAG: load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], s[[SHR]]
+
+define amdgpu_kernel void @alignbit_shr_pat(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+bb:
+  %tmp = load i64, i64 addrspace(1)* %arg, align 8
+  %tmp3 = and i32 %arg2, 31
+  %tmp4 = zext i32 %tmp3 to i64
+  %tmp5 = lshr i64 %tmp, %tmp4
+  %tmp6 = trunc i64 %tmp5 to i32
+  store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}alignbit_shl_pat:
+; GCN-DAG: s_load_dword s[[SHL:[0-9]+]]
+; GCN-DAG: load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN-DAG: s_sub_i32 s[[SHR:[0-9]+]], 32, s[[SHL]]
+; GCN:     v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], s[[SHR]]
+
+define amdgpu_kernel void @alignbit_shl_pat(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+bb:
+  %tmp = load i64, i64 addrspace(1)* %arg, align 8
+  %tmp3 = and i32 %arg2, 31
+  %tmp4 = zext i32 %tmp3 to i64
+  %tmp5 = shl i64 %tmp, %tmp4
+  %tmp6 = trunc i64 %tmp5 to i32
+  store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+  ret void
+}
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -929,6 +929,15 @@
 defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
 def : ROTRPattern <V_ALIGNBIT_B32>;
 
+def : Pat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
+          (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
+                          (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
+
+def : Pat<(i32 (trunc (shl i64:$src0, (and i32:$src1, (i32 31))))),
+          (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
+                          (i32 (EXTRACT_SUBREG (i64 $src0), sub0)),
+                          (S_SUB_I32 (i32 32), $src1))>;
+
 /********** ====================== **********/
 /**********   Indirect addressing  **********/
 /********** ====================== **********/


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D34655.104047.patch
Type: text/x-patch
Size: 2456 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170627/9a6196d2/attachment.bin>


More information about the llvm-commits mailing list