[PATCH] D65600: Relax load store vectorizer pointer strip checks

Stanislav Mekhanoshin via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 1 12:17:30 PDT 2019


rampitec created this revision.
rampitec added reviewers: arsenm, tra, bkramer.
Herald added subscribers: nhaehnle, wdng, jvesely.

The previous change to fix crash in the vectorizer introduced
performance regressions. The condition to preserve pointer
address space during the search is too tight, we only need to
match the size.


https://reviews.llvm.org/D65600

Files:
  lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
  test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll


Index: test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll
===================================================================
--- test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll
+++ test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll
@@ -1,11 +1,11 @@
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S < %s | FileCheck %s
+; RUN: opt -load-store-vectorizer -S < %s | FileCheck %s
 
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32"
+target datalayout = "e-p:64:64-p1:64:64-p5:32:32"
 
-; CHECK-LABEL: @test
+; CHECK-LABEL: @cast_to_ptr
 ; CHECK: store i32* undef, i32** %tmp9, align 8
 ; CHECK: store i32* undef, i32** %tmp7, align 8
-define amdgpu_kernel void @test() {
+define void @cast_to_ptr() {
 entry:
   %a10.ascast.i = addrspacecast i32* addrspace(5)* null to i32**
   %tmp4 = icmp eq i32 undef, 0
@@ -16,3 +16,38 @@
   store i32* undef, i32** %tmp7, align 8
   unreachable
 }
+
+; CHECK-LABEL: @cast_to_cast
+; CHECK: %tmp4 = load i32*, i32** %tmp1, align 8
+; CHECK: %tmp5 = load i32*, i32** %tmp3, align 8
+define void @cast_to_cast() {
+entry:
+  %a10.ascast.i = addrspacecast i32* addrspace(5)* undef to i32**
+  %b14.ascast.i = addrspacecast i32* addrspace(5)* null to i32**
+  %tmp1 = select i1 false, i32** %a10.ascast.i, i32** undef
+  %tmp3 = select i1 false, i32** %b14.ascast.i, i32** undef
+  %tmp4 = load i32*, i32** %tmp1, align 8
+  %tmp5 = load i32*, i32** %tmp3, align 8
+  unreachable
+}
+
+; CHECK-LABEL: @all_to_cast
+; CHECK: load <4 x float>
+define void @all_to_cast(i8* nocapture readonly align 16 dereferenceable(16) %alloc1) {
+entry:
+  %alloc16 = addrspacecast i8* %alloc1 to i8 addrspace(1)*
+  %tmp = bitcast i8 addrspace(1)* %alloc16 to float addrspace(1)*
+  %tmp1 = load float, float addrspace(1)* %tmp, align 16, !invariant.load !0
+  %tmp6 = getelementptr inbounds i8, i8 addrspace(1)* %alloc16, i64 4
+  %tmp7 = bitcast i8 addrspace(1)* %tmp6 to float addrspace(1)*
+  %tmp8 = load float, float addrspace(1)* %tmp7, align 4, !invariant.load !0
+  %tmp15 = getelementptr inbounds i8, i8 addrspace(1)* %alloc16, i64 8
+  %tmp16 = bitcast i8 addrspace(1)* %tmp15 to float addrspace(1)*
+  %tmp17 = load float, float addrspace(1)* %tmp16, align 8, !invariant.load !0
+  %tmp24 = getelementptr inbounds i8, i8 addrspace(1)* %alloc16, i64 12
+  %tmp25 = bitcast i8 addrspace(1)* %tmp24 to float addrspace(1)*
+  %tmp26 = load float, float addrspace(1)* %tmp25, align 4, !invariant.load !0
+  ret void
+}
+
+!0 = !{}
Index: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -339,14 +339,13 @@
                                         const APInt &PtrDelta,
                                         unsigned Depth) const {
   unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType());
-  unsigned PtrAS = PtrA->getType()->getPointerAddressSpace();
   APInt OffsetA(PtrBitWidth, 0);
   APInt OffsetB(PtrBitWidth, 0);
   PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
   PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
 
-  if (PtrA->getType()->getPointerAddressSpace() != PtrAS ||
-      PtrB->getType()->getPointerAddressSpace() != PtrAS)
+  if (DL.getTypeStoreSizeInBits(PtrA->getType()) != PtrBitWidth ||
+      DL.getTypeStoreSizeInBits(PtrB->getType()) != PtrBitWidth)
     return false;
 
   APInt OffsetDelta = OffsetB - OffsetA;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D65600.212880.patch
Type: text/x-patch
Size: 3614 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190801/a9a00ebe/attachment.bin>


More information about the llvm-commits mailing list