[llvm] r276171 - [LSV] Don't move stores across may-load instrs, and loosen restrictions on moving loads.

Wed Jul 20 13:07:37 PDT 2016

Author: jlebar
Date: Wed Jul 20 15:07:37 2016
New Revision: 276171

URL: http://llvm.org/viewvc/llvm-project?rev=276171&view=rev
Log:
[LSV] Don't move stores across may-load instrs, and loosen restrictions on moving loads.

Summary:
Previously we wouldn't move loads/stores across instructions that had
side-effects, where that was defined as may-write or may-throw.  But
this is not sufficiently restrictive: Stores can't safely be moved
across instructions that may load.

This patch also adds a DEBUG check that all instructions in our chain
are either loads or stores.

Reviewers: asbirlea

Subscribers: llvm-commits, jholewinski, arsenm, mzolotukhin

Differential Revision: https://reviews.llvm.org/D22547

Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
    llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp?rev=276171&r1=276170&r2=276171&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp Wed Jul 20 15:07:37 2016
@@ -429,6 +429,18 @@ ArrayRef<Value *> Vectorizer::getVectori
   SmallVector<std::pair<Value *, unsigned>, 16> MemoryInstrs;
   SmallVector<std::pair<Value *, unsigned>, 16> ChainInstrs;
 
+  bool IsLoadChain = isa<LoadInst>(Chain[0]);
+  DEBUG({
+    for (Value *V : Chain) {
+      if (IsLoadChain)
+        assert(isa<LoadInst>(V) &&
+               "All elements of Chain must be loads, or all must be stores.");
+      else
+        assert(isa<StoreInst>(V) &&
+               "All elements of Chain must be loads, or all must be stores.");
+    }
+  });
+
   unsigned InstrIdx = 0;
   for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
     ++InstrIdx;
@@ -437,8 +449,12 @@ ArrayRef<Value *> Vectorizer::getVectori
         MemoryInstrs.push_back({&I, InstrIdx});
       else
         ChainInstrs.push_back({&I, InstrIdx});
-    } else if (I.mayHaveSideEffects()) {
-      DEBUG(dbgs() << "LSV: Found side-effecting operation: " << I << '\n');
+    } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {
+      DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n');
+      break;
+    } else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) {
+      DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I
+                   << '\n');
       break;
     }
   }

Modified: llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll?rev=276171&r1=276170&r2=276171&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll (original)
+++ llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll Wed Jul 20 15:07:37 2016
@@ -1,48 +1,209 @@
 ; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s
 
-; If we have a chain of loads or stores with a side-effecting operation in the
-; middle, we should still be able to merge the loads/stores that appear
-; before/after the side-effecting op.  We just can't merge *across* the
-; side-effecting op.
+; Check that the load/store vectorizer is willing to move loads/stores across
+; intervening instructions only if it's safe.
+;
+;  - Loads can be moved across instructions that don't write or throw.
+;  - Stores can only be moved across instructions which don't read, write, or
+;    throw.
+
+declare void @fn()
+declare void @fn_nounwind() #0
+declare void @fn_nounwind_writeonly() #1
+declare void @fn_nounwind_readonly() #2
+declare void @fn_writeonly() #3
+declare void @fn_readonly() #4
+declare void @fn_readnone() #5
 
-declare void @fn() #0
-
-; CHECK-LABEL: @merge_stores
-; CHECK: store <2 x i32> <i32 100, i32 101>
+; CHECK-LABEL: @load_fn
+; CHECK: load
 ; CHECK: call void @fn()
-; CHECK: store <2 x i32> <i32 102, i32 103>
-define void @merge_stores(i32* %out) #0 {
-  %out.gep.1 = getelementptr i32, i32* %out, i32 1
-  %out.gep.2 = getelementptr i32, i32* %out, i32 2
-  %out.gep.3 = getelementptr i32, i32* %out, i32 3
+; CHECK: load
+define void @load_fn(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
 
-  store i32 101, i32* %out.gep.1
-  store i32 100, i32* %out
+  %v0 = load i32, i32* %p
   call void @fn()
-  store i32 102, i32* %out.gep.2
-  store i32 103, i32* %out.gep.3
+  %v1 = load i32, i32* %p.1
+  ret void
+}
+
+; CHECK-LABEL: @load_fn_nounwind
+; CHECK: load
+; CHECK: call void @fn_nounwind()
+; CHECK: load
+define void @load_fn_nounwind(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
+
+  %v0 = load i32, i32* %p
+  call void @fn_nounwind() #0
+  %v1 = load i32, i32* %p.1
+  ret void
+}
+
+; CHECK-LABEL: @load_fn_nounwind_writeonly
+; CHECK: load
+; CHECK: call void @fn_nounwind_writeonly()
+; CHECK: load
+define void @load_fn_nounwind_writeonly(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
+
+  %v0 = load i32, i32* %p
+  call void @fn_nounwind_writeonly() #1
+  %v1 = load i32, i32* %p.1
+  ret void
+}
+
+; CHECK-LABEL: @load_fn_nounwind_readonly
+; CHECK-DAG: load <2 x i32>
+; CHECK-DAG: call void @fn_nounwind_readonly()
+define void @load_fn_nounwind_readonly(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
+
+  %v0 = load i32, i32* %p
+  call void @fn_nounwind_readonly() #2
+  %v1 = load i32, i32* %p.1
   ret void
 }
 
-; CHECK-LABEL: @merge_loads
-; CHECK: load <2 x i32>
+; CHECK-LABEL: @load_fn_readonly
+; CHECK: load
+; CHECK: call void @fn_readonly
+; CHECK: load
+define void @load_fn_readonly(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
+
+  %v0 = load i32, i32* %p
+  call void @fn_readonly() #4
+  %v1 = load i32, i32* %p.1
+  ret void
+}
+
+; CHECK-LABEL: @load_fn_writeonly
+; CHECK: load
+; CHECK: call void @fn_writeonly()
+; CHECK: load
+define void @load_fn_writeonly(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
+
+  %v0 = load i32, i32* %p
+  call void @fn_writeonly() #3
+  %v1 = load i32, i32* %p.1
+  ret void
+}
+
+; CHECK-LABEL: @load_fn_readnone
+; CHECK-DAG: load <2 x i32>
+; CHECK-DAG: call void @fn_readnone()
+define void @load_fn_readnone(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
+
+  %v0 = load i32, i32* %p
+  call void @fn_readnone() #5
+  %v1 = load i32, i32* %p.1
+  ret void
+}
+
+; ------------------------------------------------
+; Same tests, but now for stores instead of loads.
+; ------------------------------------------------
+
+; CHECK-LABEL: @store_fn
+; CHECK: store
 ; CHECK: call void @fn()
-; CHECK: load <2 x i32>
-define i32 @merge_loads(i32* %in) #0 {
-  %in.gep.1 = getelementptr i32, i32* %in, i32 1
-  %in.gep.2 = getelementptr i32, i32* %in, i32 2
-  %in.gep.3 = getelementptr i32, i32* %in, i32 3
+; CHECK: store
+define void @store_fn(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
 
-  %v1 = load i32, i32* %in
-  %v2 = load i32, i32* %in.gep.1
+  store i32 0, i32* %p
   call void @fn()
-  %v3 = load i32, i32* %in.gep.2
-  %v4 = load i32, i32* %in.gep.3
+  store i32 0, i32* %p.1
+  ret void
+}
+
+; CHECK-LABEL: @store_fn_nounwind
+; CHECK: store
+; CHECK: call void @fn_nounwind()
+; CHECK: store
+define void @store_fn_nounwind(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
+
+  store i32 0, i32* %p
+  call void @fn_nounwind() #0
+  store i32 0, i32* %p.1
+  ret void
+}
 
-  %sum1 = add i32 %v1, %v2
-  %sum2 = add i32 %sum1, %v3
-  %sum3 = add i32 %sum2, %v4
-  ret i32 %v4
+; CHECK-LABEL: @store_fn_nounwind_writeonly
+; CHECK: store
+; CHECK: call void @fn_nounwind_writeonly()
+; CHECK: store
+define void @store_fn_nounwind_writeonly(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
+
+  store i32 0, i32* %p
+  call void @fn_nounwind_writeonly() #1
+  store i32 0, i32* %p.1
+  ret void
 }
 
+; CHECK-LABEL: @store_fn_nounwind_readonly
+; CHECK: store
+; CHECK: call void @fn_nounwind_readonly()
+; CHECK: store
+define void @store_fn_nounwind_readonly(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
+
+  store i32 0, i32* %p
+  call void @fn_nounwind_readonly() #2
+  store i32 0, i32* %p.1
+  ret void
+}
+
+; CHECK-LABEL: @store_fn_readonly
+; CHECK: store
+; CHECK: call void @fn_readonly
+; CHECK: store
+define void @store_fn_readonly(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
+
+  store i32 0, i32* %p
+  call void @fn_readonly() #4
+  store i32 0, i32* %p.1
+  ret void
+}
+
+; CHECK-LABEL: @store_fn_writeonly
+; CHECK: store
+; CHECK: call void @fn_writeonly()
+; CHECK: store
+define void @store_fn_writeonly(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
+
+  store i32 0, i32* %p
+  call void @fn_writeonly() #3
+  store i32 0, i32* %p.1
+  ret void
+}
+
+; This is the only store idiom we can vectorize.
+; CHECK-LABEL: @store_fn_readnone
+; CHECK-DAG: store <2 x i32>
+; CHECK-DAG: call void @fn_readnone()
+define void @store_fn_readnone(i32* %p) #0 {
+  %p.1 = getelementptr i32, i32* %p, i32 1
+
+  store i32 0, i32* %p
+  call void @fn_readnone() #5
+  store i32 0, i32* %p.1
+  ret void
+}
+
+
 attributes #0 = { nounwind }
+attributes #1 = { nounwind writeonly }
+attributes #2 = { nounwind readonly }
+attributes #3 = { writeonly }
+attributes #4 = { readonly }
+; readnone implies nounwind, so no need to test separately
+attributes #5 = { nounwind readnone }