[llvm] a8a31fd - [Scalarizer] Fix a non-deterministic scatter order problem

Mon Apr 20 07:06:35 PDT 2020

Author: Bjorn Pettersson
Date: 2020-04-20T16:05:33+02:00
New Revision: a8a31fdd80ccd5c9939ac41a8751ab3e7b8e146d

URL: https://github.com/llvm/llvm-project/commit/a8a31fdd80ccd5c9939ac41a8751ab3e7b8e146d
DIFF: https://github.com/llvm/llvm-project/commit/a8a31fdd80ccd5c9939ac41a8751ab3e7b8e146d.diff

LOG: [Scalarizer] Fix a non-deterministic scatter order problem

Summary:
The indexing operator in Scatterer may result in building new
instructions. When using multiple such operators in a function
argument list the order in which we build instructions depend on
argument evaluation order (which is undefined in C++).
This patch avoid such problems by expanding the components using
the [] operator prior to the function call.

Problem was seen when comparing output, while builing LLVM with
different compilers (clang vs gcc).

Reviewers: foad, cameron.mcinally, uabelho

Reviewed By: foad

Subscribers: hiraditya, mgrang, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78455

Added: 
    llvm/test/Transforms/Scalarizer/scatter-order.ll

Modified: 
    llvm/lib/Transforms/Scalar/Scalarizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 8404188a951b..3675e97f7d00 100644

--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -485,15 +485,17 @@ bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
 
   unsigned NumElems = VT->getNumElements();
   IRBuilder<> Builder(&I);
-  Scatterer Op0 = scatter(&I, I.getOperand(0));
-  Scatterer Op1 = scatter(&I, I.getOperand(1));
-  assert(Op0.size() == NumElems && "Mismatched binary operation");
-  assert(Op1.size() == NumElems && "Mismatched binary operation");
+  Scatterer VOp0 = scatter(&I, I.getOperand(0));
+  Scatterer VOp1 = scatter(&I, I.getOperand(1));
+  assert(VOp0.size() == NumElems && "Mismatched binary operation");
+  assert(VOp1.size() == NumElems && "Mismatched binary operation");
   ValueVector Res;
   Res.resize(NumElems);
-  for (unsigned Elem = 0; Elem < NumElems; ++Elem)
-    Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
-                      I.getName() + ".i" + Twine(Elem));
+  for (unsigned Elem = 0; Elem < NumElems; ++Elem) {
+    Value *Op0 = VOp0[Elem];
+    Value *Op1 = VOp1[Elem];
+    Res[Elem] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Elem));
+  }
   gather(&I, Res);
   return true;
 }
@@ -576,24 +578,31 @@ bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
 
   unsigned NumElems = VT->getNumElements();
   IRBuilder<> Builder(&SI);
-  Scatterer Op1 = scatter(&SI, SI.getOperand(1));
-  Scatterer Op2 = scatter(&SI, SI.getOperand(2));
-  assert(Op1.size() == NumElems && "Mismatched select");
-  assert(Op2.size() == NumElems && "Mismatched select");
+  Scatterer VOp1 = scatter(&SI, SI.getOperand(1));
+  Scatterer VOp2 = scatter(&SI, SI.getOperand(2));
+  assert(VOp1.size() == NumElems && "Mismatched select");
+  assert(VOp2.size() == NumElems && "Mismatched select");
   ValueVector Res;
   Res.resize(NumElems);
 
   if (SI.getOperand(0)->getType()->isVectorTy()) {
-    Scatterer Op0 = scatter(&SI, SI.getOperand(0));
-    assert(Op0.size() == NumElems && "Mismatched select");
-    for (unsigned I = 0; I < NumElems; ++I)
-      Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
+    Scatterer VOp0 = scatter(&SI, SI.getOperand(0));
+    assert(VOp0.size() == NumElems && "Mismatched select");
+    for (unsigned I = 0; I < NumElems; ++I) {
+      Value *Op0 = VOp0[I];
+      Value *Op1 = VOp1[I];
+      Value *Op2 = VOp2[I];
+      Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
                                     SI.getName() + ".i" + Twine(I));
+    }
   } else {
     Value *Op0 = SI.getOperand(0);
-    for (unsigned I = 0; I < NumElems; ++I)
-      Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
+    for (unsigned I = 0; I < NumElems; ++I) {
+      Value *Op1 = VOp1[I];
+      Value *Op2 = VOp2[I];
+      Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
                                     SI.getName() + ".i" + Twine(I));
+    }
   }
   gather(&SI, Res);
   return true;
@@ -822,14 +831,16 @@ bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
 
   unsigned NumElems = Layout.VecTy->getNumElements();
   IRBuilder<> Builder(&SI);
-  Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
-  Scatterer Val = scatter(&SI, FullValue);
+  Scatterer VPtr = scatter(&SI, SI.getPointerOperand());
+  Scatterer VVal = scatter(&SI, FullValue);
 
   ValueVector Stores;
   Stores.resize(NumElems);
   for (unsigned I = 0; I < NumElems; ++I) {
     unsigned Align = Layout.getElemAlign(I);
-    Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], MaybeAlign(Align));
+    Value *Val = VVal[I];
+    Value *Ptr = VPtr[I];
+    Stores[I] = Builder.CreateAlignedStore(Val, Ptr, MaybeAlign(Align));
   }
   transferMetadataAndIRFlags(&SI, Stores);
   return true;

diff  --git a/llvm/test/Transforms/Scalarizer/scatter-order.ll b/llvm/test/Transforms/Scalarizer/scatter-order.ll
new file mode 100644
index 000000000000..df072abae53e
--- /dev/null
+++ b/llvm/test/Transforms/Scalarizer/scatter-order.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -scalarizer -scalarize-load-store -S | FileCheck %s
+; RUN: opt %s -passes='function(scalarizer)' -scalarize-load-store -S | FileCheck %s
+
+; This verifies that the order of extract element instructions is
+; deterministic. In the past we could end up with 
diff erent results depending
+; on the compiler used (due to argument evaluation order being undefined in
+; C++). The order of the extracts is not really important for correctness of
+; the result, but when debugging and creating test cases it is helpful if we
+; get the same out put regardless of which compiler we use when building the
+; compiler.
+
+define <2 x i32> @test1(i1 %b, <2 x i32> %i, <2 x i32> %j) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0
+; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = select i1 [[B:%.*]], i32 [[I_I0]], i32 [[J_I0]]
+; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1
+; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = select i1 [[B]], i32 [[I_I1]], i32 [[J_I1]]
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i32> undef, i32 [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[RES]]
+;
+  %res = select i1 %b, <2 x i32> %i, <2 x i32> %j
+  ret <2 x i32> %res
+}
+
+define <2 x i32> @test2(<2 x i1> %b, <2 x i32> %i, <2 x i32> %j) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[B_I0:%.*]] = extractelement <2 x i1> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0
+; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = select i1 [[B_I0]], i32 [[I_I0]], i32 [[J_I0]]
+; CHECK-NEXT:    [[B_I1:%.*]] = extractelement <2 x i1> [[B]], i32 1
+; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1
+; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = select i1 [[B_I1]], i32 [[I_I1]], i32 [[J_I1]]
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i32> undef, i32 [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[RES]]
+;
+  %res = select <2 x i1> %b, <2 x i32> %i, <2 x i32> %j
+  ret <2 x i32> %res
+}
+
+define <2 x i32> @test3(<2 x i32> %i, <2 x i32> %j) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0
+; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]]
+; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1
+; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]]
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i32> undef, i32 [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[RES]]
+;
+  %res = add nuw nsw <2 x i32> %i, %j
+  ret <2 x i32> %res
+}
+
+define void @test4(<2 x i32>* %ptr, <2 x i32> %val) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[VAL_I0:%.*]] = extractelement <2 x i32> [[VAL:%.*]], i32 0
+; CHECK-NEXT:    [[PTR_I0:%.*]] = bitcast <2 x i32>* [[PTR:%.*]] to i32*
+; CHECK-NEXT:    store i32 [[VAL_I0]], i32* [[PTR_I0]], align 8
+; CHECK-NEXT:    [[VAL_I1:%.*]] = extractelement <2 x i32> [[VAL]], i32 1
+; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr i32, i32* [[PTR_I0]], i32 1
+; CHECK-NEXT:    store i32 [[VAL_I1]], i32* [[PTR_I1]], align 4
+; CHECK-NEXT:    ret void
+;
+  store <2 x i32> %val, <2 x i32> *%ptr
+  ret void
+}
+