[PATCH] D19532: Loop with masked store - bugfix

Elena Demikhovsky via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 26 05:08:31 PDT 2016


delena created this revision.
delena added reviewers: Ayal, hfinkel, anemet.
delena added subscribers: llvm-commits, mzolotukhin, dorit.
delena set the repository for this revision to rL LLVM.

Fixed a bug in loop vectorization with conditional store.
The following loop should be easily vectorized on targets with masked store:
  unsigned int a[256], b[256];
  void foo() {
    for (int i = 0; i < 256; i++) {
      if (b[i] > a[i]) 
        a[i] = b[i];
    }
  }
The loop in the example calculates MAX value.
In the future, this loop should produce PMAX + store. But in the current version the function remains scalar due to the bug in masked store.
Any other loop without MAX pattern needs this fix anyway.


Repository:
  rL LLVM

http://reviews.llvm.org/D19532

Files:
  ../lib/Transforms/Vectorize/LoopVectorize.cpp
  ../test/Transforms/LoopVectorize/X86/max-mstore.ll

Index: ../lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- ../lib/Transforms/Vectorize/LoopVectorize.cpp
+++ ../lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4909,24 +4909,21 @@
       if (!SI)
         return false;
 
+      // Build a masked store if it is legal for the target, otherwise
+      // scalarize the block.
+      if (isLegalMaskedStore(SI->getValueOperand()->getType(),
+                             SI->getPointerOperand()) ||
+          isLegalMaskedScatter(SI->getValueOperand()->getType())) {
+        MaskedOp.insert(SI);
+        continue;
+      }
+
       bool isSafePtr = (SafePtrs.count(SI->getPointerOperand()) != 0);
       bool isSinglePredecessor = SI->getParent()->getSinglePredecessor();
 
       if (++NumPredStores > NumberOfStoresToPredicate || !isSafePtr ||
-          !isSinglePredecessor) {
-        // Build a masked store if it is legal for the target, otherwise
-        // scalarize the block.
-        bool isLegalMaskedOp =
-          isLegalMaskedStore(SI->getValueOperand()->getType(),
-                             SI->getPointerOperand()) ||
-          isLegalMaskedScatter(SI->getValueOperand()->getType());
-        if (isLegalMaskedOp) {
-          --NumPredStores;
-          MaskedOp.insert(SI);
-          continue;
-        }
+          !isSinglePredecessor)
         return false;
-      }
     }
     if (it->mayThrow())
       return false;
Index: ../test/Transforms/LoopVectorize/X86/max-mstore.ll
===================================================================
--- ../test/Transforms/LoopVectorize/X86/max-mstore.ll
+++ ../test/Transforms/LoopVectorize/X86/max-mstore.ll
@@ -0,0 +1,46 @@
+; RUN: opt -basicaa -loop-vectorize -force-vector-interleave=1 -S -mcpu=core-avx2
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at b = common global [256 x i32] zeroinitializer, align 16
+ at a = common global [256 x i32] zeroinitializer, align 16
+
+; unsigned int a[256], b[256];
+; void foo() {
+;  for (i = 0; i < 256; i++) {
+;    if (b[i] > a[i])
+;      a[i] = b[i];
+;  }
+; }
+
+; CHECK-LABEL: foo
+; CHECK: load <8 x i32>
+; CHECK: icmp ugt <8 x i32>
+; CHECK: masked.store
+
+define void @foo() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* @b, i64 0, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [256 x i32], [256 x i32]* @a, i64 0, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %cmp3 = icmp ugt i32 %0, %1
+  br i1 %cmp3, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  store i32 %0, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  ret void
+}


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D19532.54994.patch
Type: text/x-patch
Size: 3274 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160426/109dc761/attachment.bin>


More information about the llvm-commits mailing list