[llvm] r347759 - [SLP]Fix PR39774: Set ReductionRoot if the original instruction is vectorized.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 28 06:34:11 PST 2018


Author: abataev
Date: Wed Nov 28 06:34:11 2018
New Revision: 347759

URL: http://llvm.org/viewvc/llvm-project?rev=347759&view=rev
Log:
[SLP]Fix PR39774: Set ReductionRoot if the original instruction is vectorized.

Summary:
If the original reduction root instruction was vectorized, it might be
removed from the tree. It means that the insertion point may become
invalidated and the whole vectorization of the reduction leads to the
incorrect output result.
The ReductionRoot instruction must be marked as externally used so it
could not be removed. Otherwise it might cause inconsistency with the
cost model and we may end up with too optimistic optimization.

Reviewers: RKSimon, spatel, hfinkel, mkuper

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D54955

Added:
    llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp

Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=347759&r1=347758&r2=347759&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Wed Nov 28 06:34:11 2018
@@ -5453,7 +5453,7 @@ class HorizontalReduction {
     }
   };
 
-  Instruction *ReductionRoot = nullptr;
+  WeakTrackingVH ReductionRoot;
 
   /// The operation data of the reduction operation.
   OperationData ReductionData;
@@ -5738,7 +5738,7 @@ public:
     unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
 
     Value *VectorizedTree = nullptr;
-    IRBuilder<> Builder(ReductionRoot);
+    IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
     FastMathFlags Unsafe;
     Unsafe.setFast();
     Builder.setFastMathFlags(Unsafe);
@@ -5747,8 +5747,13 @@ public:
     BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
     // The same extra argument may be used several time, so log each attempt
     // to use it.
-    for (auto &Pair : ExtraArgs)
+    for (auto &Pair : ExtraArgs) {
+      assert(Pair.first && "DebugLoc must be set.");
       ExternallyUsedValues[Pair.second].push_back(Pair.first);
+    }
+    // The reduction root is used as the insertion point for new instructions,
+    // so set it as externally used to prevent it from being deleted.
+    ExternallyUsedValues[ReductionRoot];
     SmallVector<Value *, 16> IgnoreList;
     for (auto &V : ReductionOps)
       IgnoreList.append(V.begin(), V.end());
@@ -5800,6 +5805,7 @@ public:
       Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
 
       // Emit a reduction.
+      Builder.SetInsertPoint(cast<Instruction>(ReductionRoot));
       Value *ReducedSubTree =
           emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
       if (VectorizedTree) {
@@ -5826,8 +5832,6 @@ public:
         VectorizedTree = VectReductionData.createOp(Builder, "", ReductionOps);
       }
       for (auto &Pair : ExternallyUsedValues) {
-        assert(!Pair.second.empty() &&
-               "At least one DebugLoc must be inserted");
         // Add each externally used value to the final reduction.
         for (auto *I : Pair.second) {
           Builder.SetCurrentDebugLocation(I->getDebugLoc());

Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll?rev=347759&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll Wed Nov 28 06:34:11 2018
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-7 | FileCheck %s
+
+define void @Test(i32) {
+; CHECK-LABEL: @Test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[LOCAL_4_39_US:%.*]] = phi i32 [ [[VAL_42:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LOCAL_8_43_US:%.*]] = phi i32 [ [[VAL_43:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[VAL_0:%.*]] = add i32 [[LOCAL_4_39_US]], 0
+; CHECK-NEXT:    [[VAL_1:%.*]] = and i32 [[LOCAL_8_43_US]], [[VAL_0]]
+; CHECK-NEXT:    [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
+; CHECK-NEXT:    [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_6:%.*]] = add i32 [[LOCAL_8_43_US]], 55
+; CHECK-NEXT:    [[VAL_7:%.*]] = and i32 [[VAL_5]], [[VAL_6]]
+; CHECK-NEXT:    [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_11:%.*]] = add i32 [[LOCAL_8_43_US]], 285
+; CHECK-NEXT:    [[VAL_12:%.*]] = and i32 [[VAL_10]], [[VAL_11]]
+; CHECK-NEXT:    [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_18:%.*]] = add i32 [[LOCAL_8_43_US]], 1240
+; CHECK-NEXT:    [[VAL_19:%.*]] = and i32 [[VAL_17]], [[VAL_18]]
+; CHECK-NEXT:    [[VAL_20:%.*]] = add i32 [[LOCAL_8_43_US]], 1496
+; CHECK-NEXT:    [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]]
+; CHECK-NEXT:    [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_34:%.*]] = add i32 [[LOCAL_8_43_US]], 8555
+; CHECK-NEXT:    [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]]
+; CHECK-NEXT:    [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_39:%.*]] = add i32 [[LOCAL_8_43_US]], 12529
+; CHECK-NEXT:    [[VAL_40:%.*]] = and i32 [[VAL_38]], [[VAL_39]]
+; CHECK-NEXT:    [[VAL_41:%.*]] = add i32 [[LOCAL_8_43_US]], 13685
+; CHECK-NEXT:    [[VAL_42]] = and i32 [[VAL_40]], [[VAL_41]]
+; CHECK-NEXT:    [[VAL_43]] = add i32 [[LOCAL_8_43_US]], 14910
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %local_4_39.us = phi i32 [ %val_42, %loop ], [ 0, %entry ]
+  %local_8_43.us = phi i32 [ %val_43, %loop ], [ 0, %entry ]
+  %val_0 = add i32 %local_4_39.us, 0
+  %val_1 = and i32 %local_8_43.us, %val_0
+  %val_2 = and i32 %val_1, %0
+  %val_3 = and i32 %val_2, %0
+  %val_4 = and i32 %val_3, %0
+  %val_5 = and i32 %val_4, %0
+  %val_6 = add i32 %local_8_43.us, 55
+  %val_7 = and i32 %val_5, %val_6
+  %val_8 = and i32 %val_7, %0
+  %val_9 = and i32 %val_8, %0
+  %val_10 = and i32 %val_9, %0
+  %val_11 = add i32 %local_8_43.us, 285
+  %val_12 = and i32 %val_10, %val_11
+  %val_13 = and i32 %val_12, %0
+  %val_14 = and i32 %val_13, %0
+  %val_15 = and i32 %val_14, %0
+  %val_16 = and i32 %val_15, %0
+  %val_17 = and i32 %val_16, %0
+  %val_18 = add i32 %local_8_43.us, 1240
+  %val_19 = and i32 %val_17, %val_18
+  %val_20 = add i32 %local_8_43.us, 1496
+  %val_21 = and i32 %val_19, %val_20
+  %val_22 = and i32 %val_21, %0
+  %val_23 = and i32 %val_22, %0
+  %val_24 = and i32 %val_23, %0
+  %val_25 = and i32 %val_24, %0
+  %val_26 = and i32 %val_25, %0
+  %val_27 = and i32 %val_26, %0
+  %val_28 = and i32 %val_27, %0
+  %val_29 = and i32 %val_28, %0
+  %val_30 = and i32 %val_29, %0
+  %val_31 = and i32 %val_30, %0
+  %val_32 = and i32 %val_31, %0
+  %val_33 = and i32 %val_32, %0
+  %val_34 = add i32 %local_8_43.us, 8555
+  %val_35 = and i32 %val_33, %val_34
+  %val_36 = and i32 %val_35, %0
+  %val_37 = and i32 %val_36, %0
+  %val_38 = and i32 %val_37, %0
+  %val_39 = add i32 %local_8_43.us, 12529
+  %val_40 = and i32 %val_38, %val_39
+  %val_41 = add i32 %local_8_43.us, 13685
+  %val_42 = and i32 %val_40, %val_41
+  %val_43 = add i32 %local_8_43.us, 14910
+  br label %loop
+}




More information about the llvm-commits mailing list