[PATCH] D77827: [AMDGCN] Run LoadStoreVectorizer before CodeGenPrepare

Stanislav Mekhanoshin via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 9 14:11:01 PDT 2020


rampitec created this revision.
rampitec added reviewers: arsenm, yaxunl.
Herald added subscribers: kerbowa, hiraditya, tpr, nhaehnle, wdng, jvesely.

AMDGPUCodeGenPrepare widens some loads which then prevent
vectorization of an otherwise vectorizable load pair.

Run vectorizer pass before AMDGPUCodeGenPrepare to catch
the opportunity. The second run is still in place as passes
in between also create a lot of vectorization opportunities.


https://reviews.llvm.org/D77827

Files:
  llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
  llvm/test/CodeGen/AMDGPU/vectorize-loads.ll


Index: llvm/test/CodeGen/AMDGPU/vectorize-loads.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/vectorize-loads.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}load_idx_idy:
+; GCN-NOT: global_load
+; GCN: s_load_dword [[ID_XY:s[0-9]+]], s[4:5], 0x4
+; GCN-NOT: global_load
+; GCN: s_lshr_b32 [[ID_Y:s[0-9]+]], [[ID_XY]], 16
+; GCN: s_add_i32 [[ID_SUM:s[0-9]+]], [[ID_Y]], [[ID_XY]]
+; GCN: s_and_b32 s{{[0-9]+}}, [[ID_SUM]], 0xffff
+define protected amdgpu_kernel void @load_idx_idy(i32 addrspace(1)* %out) {
+entry:
+  %disp = tail call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+  %gep_x = getelementptr i8, i8 addrspace(4)* %disp, i64 4
+  %gep_x.cast = bitcast i8 addrspace(4)* %gep_x to i16 addrspace(4)*
+  %id_x = load i16, i16 addrspace(4)* %gep_x.cast, align 4, !invariant.load !0 ; load workgroup size x
+  %gep_y = getelementptr i8, i8 addrspace(4)* %disp, i64 6
+  %gep_y.cast = bitcast i8 addrspace(4)* %gep_y to i16 addrspace(4)*
+  %id_y = load i16, i16 addrspace(4)* %gep_y.cast, align 2, !invariant.load !0 ; load workgroup size y
+  %add = add nuw nsw i16 %id_y, %id_x
+  %conv = zext i16 %add to i32
+  store i32 %conv, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+
+!0 = !{!0}
Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -739,6 +739,9 @@
           AAR.addAAResult(WrapperPass->getResult());
         }));
     }
+
+    if (EnableLoadStoreVectorizer)
+      addPass(createLoadStoreVectorizerPass());
   }
 
   if (TM.getTargetTriple().getArch() == Triple::amdgcn) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D77827.256402.patch
Type: text/x-patch
Size: 1931 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200409/73d7e87c/attachment-0001.bin>


More information about the llvm-commits mailing list