[llvm] aa426c3 - [OMPIRBuilder] add minimalist reduction support

Alex Zinenko via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 30 04:58:34 PDT 2021


Author: Alex Zinenko
Date: 2021-07-30T13:58:26+02:00
New Revision: aa426c372c71901df64d7bb2f61f351e0b9fd048

URL: https://github.com/llvm/llvm-project/commit/aa426c372c71901df64d7bb2f61f351e0b9fd048
DIFF: https://github.com/llvm/llvm-project/commit/aa426c372c71901df64d7bb2f61f351e0b9fd048.diff

LOG: [OMPIRBuilder] add minimalist reduction support

This introduces a builder function for emitting IR performing reductions in
OpenMP. Reduction variable privatization and initialization to the
reduction-neutral value is expected to be handled separately. The caller
provides the reduction functions. Further commits can provide implementation of
reduction functions for the reduction operators defined in the OpenMP
specification.

This implementation was tested on an MLIR fork targeting OpenMP from C and
produced correct executable code.

Reviewed By: Meinersbur

Differential Revision: https://reviews.llvm.org/D104928

Added: 
    

Modified: 
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
    llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 8144f1527a067..a196b54af1e1d 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -486,6 +486,122 @@ class OpenMPIRBuilder {
   /// \param Loc The location where the taskyield directive was encountered.
   void createTaskyield(const LocationDescription &Loc);
 
+  /// Functions used to generate reductions. Such functions take two Values
+  /// representing LHS and RHS of the reduction, respectively, and a reference
+  /// to the value that is updated to refer to the reduction result.
+  using ReductionGenTy =
+      function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
+
+  /// Functions used to generate atomic reductions. Such functions take two
+  /// Values representing pointers to LHS and RHS of the reduction. They are
+  /// expected to atomically update the LHS to the reduced value.
+  using AtomicReductionGenTy =
+      function_ref<InsertPointTy(InsertPointTy, Value *, Value *)>;
+
+  /// Information about an OpenMP reduction.
+  struct ReductionInfo {
+    /// Returns the type of the element being reduced.
+    Type *getElementType() const {
+      return Variable->getType()->getPointerElementType();
+    }
+
+    /// Reduction variable of pointer type.
+    Value *Variable;
+
+    /// Thread-private partial reduction variable.
+    Value *PrivateVariable;
+
+    /// Callback for generating the reduction body. The IR produced by this will
+    /// be used to combine two values in a thread-safe context, e.g., under
+    /// lock or within the same thread, and therefore need not be atomic.
+    ReductionGenTy ReductionGen;
+
+    /// Callback for generating the atomic reduction body, may be null. The IR
+    /// produced by this will be used to atomically combine two values during
+    /// reduction. If null, the implementation will use the non-atomic version
+    /// along with the appropriate synchronization mechanisms.
+    AtomicReductionGenTy AtomicReductionGen;
+  };
+
+  // TODO: provide atomic and non-atomic reduction generators for reduction
+  // operators defined by the OpenMP specification.
+
+  /// Generator for '#omp reduction'.
+  ///
+  /// Emits the IR instructing the runtime to perform the specific kind of
+  /// reductions. Expects reduction variables to have been privatized and
+  /// initialized to reduction-neutral values separately. Emits the calls to
+  /// runtime functions as well as the reduction function and the basic blocks
+  /// performing the reduction atomically and non-atomically.
+  ///
+  /// The code emitted for the following:
+  ///
+  /// \code
+  ///   type var_1;
+  ///   type var_2;
+  ///   #pragma omp <directive> reduction(reduction-op:var_1,var_2)
+  ///   /* body */;
+  /// \endcode
+  ///
+  /// corresponds to the following sketch.
+  ///
+  /// \code
+  /// void _outlined_par() {
+  ///   // N is the number of 
diff erent reductions.
+  ///   void *red_array[] = {privatized_var_1, privatized_var_2, ...};
+  ///   switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
+  ///                        _omp_reduction_func,
+  ///                        _gomp_critical_user.reduction.var)) {
+  ///   case 1: {
+  ///     var_1 = var_1 <reduction-op> privatized_var_1;
+  ///     var_2 = var_2 <reduction-op> privatized_var_2;
+  ///     // ...
+  ///    __kmpc_end_reduce(...);
+  ///     break;
+  ///   }
+  ///   case 2: {
+  ///     _Atomic<ReductionOp>(var_1, privatized_var_1);
+  ///     _Atomic<ReductionOp>(var_2, privatized_var_2);
+  ///     // ...
+  ///     break;
+  ///   }
+  ///   default: break;
+  ///   }
+  /// }
+  ///
+  /// void _omp_reduction_func(void **lhs, void **rhs) {
+  ///   *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
+  ///   *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
+  ///   // ...
+  /// }
+  /// \endcode
+  ///
+  /// \param Loc                The location where the reduction was
+  ///                           encountered. Must be within the associate
+  ///                           directive and after the last local access to the
+  ///                           reduction variables.
+  /// \param AllocaIP           An insertion point suitable for allocas usable
+  ///                           in reductions.
+  /// \param Variables          A list of variables in which the reduction
+  ///                           results will be stored (values of pointer type).
+  /// \param PrivateVariables   A list of variables in which the partial
+  ///                           reduction results are stored (values of pointer
+  ///                           type). Coindexed with Variables. Privatization
+  ///                           must be handled separately from this call.
+  /// \param ReductionGen       A list of generators for non-atomic reduction
+  ///                           bodies. Each takes a pair of partially reduced
+  ///                           values and sets a new one.
+  /// \param AtomicReductionGen A list of generators for atomic reduction
+  ///                           bodies, empty if the reduction cannot be
+  ///                           performed with atomics. Each takes a pair of
+  ///                           _pointers_ to paritally reduced values and
+  ///                           atomically stores the result into the first.
+  /// \param IsNoWait           A flag set if the reduction is marked as nowait.
+  InsertPointTy createReductions(const LocationDescription &Loc,
+                                 InsertPointTy AllocaIP,
+                                 ArrayRef<ReductionInfo> ReductionInfos,
+                                 bool IsNoWait = false);
+
   ///}
 
   /// Return the insertion point used by the underlying IRBuilder.

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index eb673b199fc41..6c07a58906568 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -920,6 +920,7 @@ __OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
   OMP_IDENT_FLAG(OMP_IDENT_FLAG_##Name, #Name, Value)
 
 __OMP_IDENT_FLAG(KMPC, 0x02)
+__OMP_IDENT_FLAG(ATOMIC_REDUCE, 0x10)
 __OMP_IDENT_FLAG(BARRIER_EXPL, 0x20)
 __OMP_IDENT_FLAG(BARRIER_IMPL, 0x0040)
 __OMP_IDENT_FLAG(BARRIER_IMPL_MASK, 0x01C0)

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 76954f9a37e18..73da65638ceb0 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1022,6 +1022,179 @@ OpenMPIRBuilder::createSection(const LocationDescription &Loc,
                               /*IsCancellable*/ true);
 }
 
+/// Create a function with a unique name and a "void (i8*, i8*)" signature in
+/// the given module and return it.
+Function *getFreshReductionFunc(Module &M) {
+  Type *VoidTy = Type::getVoidTy(M.getContext());
+  Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+  auto *FuncTy =
+      FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false);
+  return Function::Create(FuncTy, GlobalVariable::InternalLinkage,
+                          M.getDataLayout().getDefaultGlobalsAddressSpace(),
+                          ".omp.reduction.func", &M);
+}
+
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
+    const LocationDescription &Loc, InsertPointTy AllocaIP,
+    ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) {
+  for (const ReductionInfo &RI : ReductionInfos) {
+    (void)RI;
+    assert(RI.Variable && "expected non-null variable");
+    assert(RI.PrivateVariable && "expected non-null private variable");
+    assert(RI.ReductionGen && "expected non-null reduction generator callback");
+    assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
+           "expected variables and their private equivalents to have the same "
+           "type");
+    assert(RI.Variable->getType()->isPointerTy() &&
+           "expected variables to be pointers");
+  }
+
+  if (!updateToLocation(Loc))
+    return InsertPointTy();
+
+  BasicBlock *InsertBlock = Loc.IP.getBlock();
+  BasicBlock *ContinuationBlock =
+      InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
+  InsertBlock->getTerminator()->eraseFromParent();
+
+  // Create and populate array of type-erased pointers to private reduction
+  // values.
+  unsigned NumReductions = ReductionInfos.size();
+  Type *RedArrayTy = ArrayType::get(Builder.getInt8PtrTy(), NumReductions);
+  Builder.restoreIP(AllocaIP);
+  Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array");
+
+  Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
+
+  for (auto En : enumerate(ReductionInfos)) {
+    unsigned Index = En.index();
+    const ReductionInfo &RI = En.value();
+    Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
+        RedArrayTy, RedArray, 0, Index, "red.array.elem." + Twine(Index));
+    Value *Casted =
+        Builder.CreateBitCast(RI.PrivateVariable, Builder.getInt8PtrTy(),
+                              "private.red.var." + Twine(Index) + ".casted");
+    Builder.CreateStore(Casted, RedArrayElemPtr);
+  }
+
+  // Emit a call to the runtime function that orchestrates the reduction.
+  // Declare the reduction function in the process.
+  Function *Func = Builder.GetInsertBlock()->getParent();
+  Module *Module = Func->getParent();
+  Value *RedArrayPtr =
+      Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr");
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  bool CanGenerateAtomic =
+      llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) {
+        return RI.AtomicReductionGen;
+      });
+  Value *Ident = getOrCreateIdent(
+      SrcLocStr, CanGenerateAtomic ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
+                                   : IdentFlag(0));
+  Value *ThreadId = getOrCreateThreadID(Ident);
+  Constant *NumVariables = Builder.getInt32(NumReductions);
+  const DataLayout &DL = Module->getDataLayout();
+  unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy);
+  Constant *RedArraySize = Builder.getInt64(RedArrayByteSize);
+  Function *ReductionFunc = getFreshReductionFunc(*Module);
+  Value *Lock = getOMPCriticalRegionLock(".reduction");
+  Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
+      IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
+               : RuntimeFunction::OMPRTL___kmpc_reduce);
+  CallInst *ReduceCall =
+      Builder.CreateCall(ReduceFunc,
+                         {Ident, ThreadId, NumVariables, RedArraySize,
+                          RedArrayPtr, ReductionFunc, Lock},
+                         "reduce");
+
+  // Create final reduction entry blocks for the atomic and non-atomic case.
+  // Emit IR that dispatches control flow to one of the blocks based on the
+  // reduction supporting the atomic mode.
+  BasicBlock *NonAtomicRedBlock =
+      BasicBlock::Create(Module->getContext(), "reduce.switch.nonatomic", Func);
+  BasicBlock *AtomicRedBlock =
+      BasicBlock::Create(Module->getContext(), "reduce.switch.atomic", Func);
+  SwitchInst *Switch =
+      Builder.CreateSwitch(ReduceCall, ContinuationBlock, /* NumCases */ 2);
+  Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
+  Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
+
+  // Populate the non-atomic reduction using the elementwise reduction function.
+  // This loads the elements from the global and private variables and reduces
+  // them before storing back the result to the global variable.
+  Builder.SetInsertPoint(NonAtomicRedBlock);
+  for (auto En : enumerate(ReductionInfos)) {
+    const ReductionInfo &RI = En.value();
+    Type *ValueType = RI.getElementType();
+    Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable,
+                                         "red.value." + Twine(En.index()));
+    Value *PrivateRedValue =
+        Builder.CreateLoad(ValueType, RI.PrivateVariable,
+                           "red.private.value." + Twine(En.index()));
+    Value *Reduced;
+    Builder.restoreIP(
+        RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced));
+    if (!Builder.GetInsertBlock())
+      return InsertPointTy();
+    Builder.CreateStore(Reduced, RI.Variable);
+  }
+  Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
+      IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
+               : RuntimeFunction::OMPRTL___kmpc_end_reduce);
+  Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
+  Builder.CreateBr(ContinuationBlock);
+
+  // Populate the atomic reduction using the atomic elementwise reduction
+  // function. There are no loads/stores here because they will be happening
+  // inside the atomic elementwise reduction.
+  Builder.SetInsertPoint(AtomicRedBlock);
+  if (CanGenerateAtomic) {
+    for (const ReductionInfo &RI : ReductionInfos) {
+      Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.Variable,
+                                              RI.PrivateVariable));
+      if (!Builder.GetInsertBlock())
+        return InsertPointTy();
+    }
+    Builder.CreateBr(ContinuationBlock);
+  } else {
+    Builder.CreateUnreachable();
+  }
+
+  // Populate the outlined reduction function using the elementwise reduction
+  // function. Partial values are extracted from the type-erased array of
+  // pointers to private variables.
+  BasicBlock *ReductionFuncBlock =
+      BasicBlock::Create(Module->getContext(), "", ReductionFunc);
+  Builder.SetInsertPoint(ReductionFuncBlock);
+  Value *LHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(0),
+                                             RedArrayTy->getPointerTo());
+  Value *RHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(1),
+                                             RedArrayTy->getPointerTo());
+  for (auto En : enumerate(ReductionInfos)) {
+    const ReductionInfo &RI = En.value();
+    Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
+        RedArrayTy, LHSArrayPtr, 0, En.index());
+    Value *LHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), LHSI8PtrPtr);
+    Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
+    Value *LHS = Builder.CreateLoad(RI.getElementType(), LHSPtr);
+    Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
+        RedArrayTy, RHSArrayPtr, 0, En.index());
+    Value *RHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), RHSI8PtrPtr);
+    Value *RHSPtr =
+        Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
+    Value *RHS = Builder.CreateLoad(RI.getElementType(), RHSPtr);
+    Value *Reduced;
+    Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
+    if (!Builder.GetInsertBlock())
+      return InsertPointTy();
+    Builder.CreateStore(Reduced, LHSPtr);
+  }
+  Builder.CreateRetVoid();
+
+  Builder.SetInsertPoint(ContinuationBlock);
+  return Builder.saveIP();
+}
+
 OpenMPIRBuilder::InsertPointTy
 OpenMPIRBuilder::createMaster(const LocationDescription &Loc,
                               BodyGenCallbackTy BodyGenCB,

diff  --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 50887611eaf17..dbb5e8e4d440e 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -154,14 +154,14 @@ class OpenMPIRBuilderTestWithParams
       public ::testing::WithParamInterface<omp::OMPScheduleType> {};
 
 // Returns the value stored in the given allocation. Returns null if the given
-// value is not a result of an allocation, if no value is stored or if there is
-// more than one store.
-static Value *findStoredValue(Value *AllocaValue) {
-  Instruction *Alloca = dyn_cast<AllocaInst>(AllocaValue);
-  if (!Alloca)
+// value is not a result of an InstTy instruction, if no value is stored or if
+// there is more than one store.
+template <typename InstTy> static Value *findStoredValue(Value *AllocaValue) {
+  Instruction *Inst = dyn_cast<InstTy>(AllocaValue);
+  if (!Inst)
     return nullptr;
   StoreInst *Store = nullptr;
-  for (Use &U : Alloca->uses()) {
+  for (Use &U : Inst->uses()) {
     if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) {
       EXPECT_EQ(Store, nullptr);
       Store = CandidateStore;
@@ -545,7 +545,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
   EXPECT_EQ(ForkCI->getArgOperand(1),
             ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
   EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
-  EXPECT_EQ(findStoredValue(ForkCI->getArgOperand(3)), F->arg_begin());
+  EXPECT_EQ(findStoredValue<AllocaInst>(ForkCI->getArgOperand(3)),
+            F->arg_begin());
 }
 
 TEST_F(OpenMPIRBuilderTest, ParallelNested) {
@@ -860,14 +861,15 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
   EXPECT_EQ(ForkCI->getArgOperand(1),
             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
-  Value *StoredForkArg = findStoredValue(ForkCI->getArgOperand(3));
+  Value *StoredForkArg = findStoredValue<AllocaInst>(ForkCI->getArgOperand(3));
   EXPECT_EQ(StoredForkArg, F->arg_begin());
 
   EXPECT_EQ(DirectCI->getCalledFunction(), OutlinedFn);
   EXPECT_EQ(DirectCI->getNumArgOperands(), 3U);
   EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(0)));
   EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(1)));
-  Value *StoredDirectArg = findStoredValue(DirectCI->getArgOperand(2));
+  Value *StoredDirectArg =
+      findStoredValue<AllocaInst>(DirectCI->getArgOperand(2));
   EXPECT_EQ(StoredDirectArg, F->arg_begin());
 }
 
@@ -2517,6 +2519,559 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) {
   EXPECT_FALSE(verifyModule(*M, &errs()));
 }
 
+/// Returns the single instruction of InstTy type in BB that uses the value V.
+/// If there is more than one such instruction, returns null.
+template <typename InstTy>
+static InstTy *findSingleUserInBlock(Value *V, BasicBlock *BB) {
+  InstTy *Result = nullptr;
+  for (User *U : V->users()) {
+    auto *Inst = dyn_cast<InstTy>(U);
+    if (!Inst || Inst->getParent() != BB)
+      continue;
+    if (Result)
+      return nullptr;
+    Result = Inst;
+  }
+  return Result;
+}
+
+/// Returns true if BB contains a simple binary reduction that loads a value
+/// from Accum, performs some binary operation with it, and stores it back to
+/// Accum.
+static bool isSimpleBinaryReduction(Value *Accum, BasicBlock *BB,
+                                    Instruction::BinaryOps *OpCode = nullptr) {
+  StoreInst *Store = findSingleUserInBlock<StoreInst>(Accum, BB);
+  if (!Store)
+    return false;
+  auto *Stored = dyn_cast<BinaryOperator>(Store->getOperand(0));
+  if (!Stored)
+    return false;
+  if (OpCode && *OpCode != Stored->getOpcode())
+    return false;
+  auto *Load = dyn_cast<LoadInst>(Stored->getOperand(0));
+  return Load && Load->getOperand(0) == Accum;
+}
+
+/// Returns true if BB contains a binary reduction that reduces V using a binary
+/// operator into an accumulator that is a function argument.
+static bool isValueReducedToFuncArg(Value *V, BasicBlock *BB) {
+  auto *ReductionOp = findSingleUserInBlock<BinaryOperator>(V, BB);
+  if (!ReductionOp)
+    return false;
+
+  auto *GlobalLoad = dyn_cast<LoadInst>(ReductionOp->getOperand(0));
+  if (!GlobalLoad)
+    return false;
+
+  auto *Store = findSingleUserInBlock<StoreInst>(ReductionOp, BB);
+  if (!Store)
+    return false;
+
+  return Store->getPointerOperand() == GlobalLoad->getPointerOperand() &&
+         isa<Argument>(GlobalLoad->getPointerOperand());
+}
+
+/// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and
+/// [0, 1], respectively, and assigns results of these instructions to Zero and
+/// One. Returns true on success, false on failure or if such instructions are
+/// not unique among the users of Ptr.
+static bool findGEPZeroOne(Value *Ptr, Value *&Zero, Value *&One) {
+  Zero = nullptr;
+  One = nullptr;
+  for (User *U : Ptr->users()) {
+    if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      if (GEP->getNumIndices() != 2)
+        continue;
+      auto *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
+      auto *SecondIdx = dyn_cast<ConstantInt>(GEP->getOperand(2));
+      EXPECT_NE(FirstIdx, nullptr);
+      EXPECT_NE(SecondIdx, nullptr);
+
+      EXPECT_TRUE(FirstIdx->isZero());
+      if (SecondIdx->isZero()) {
+        if (Zero)
+          return false;
+        Zero = GEP;
+      } else if (SecondIdx->isOne()) {
+        if (One)
+          return false;
+        One = GEP;
+      } else {
+        return false;
+      }
+    }
+  }
+  return Zero != nullptr && One != nullptr;
+}
+
+static OpenMPIRBuilder::InsertPointTy
+sumReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
+             Value *&Result) {
+  IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
+  Result = Builder.CreateFAdd(LHS, RHS, "red.add");
+  return Builder.saveIP();
+}
+
+static OpenMPIRBuilder::InsertPointTy
+sumAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS) {
+  IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
+  Value *Partial = Builder.CreateLoad(RHS->getType()->getPointerElementType(),
+                                      RHS, "red.partial");
+  Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, LHS, Partial, None,
+                          AtomicOrdering::Monotonic);
+  return Builder.saveIP();
+}
+
+static OpenMPIRBuilder::InsertPointTy
+xorReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
+             Value *&Result) {
+  IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
+  Result = Builder.CreateXor(LHS, RHS, "red.xor");
+  return Builder.saveIP();
+}
+
+static OpenMPIRBuilder::InsertPointTy
+xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS) {
+  IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
+  Value *Partial = Builder.CreateLoad(RHS->getType()->getPointerElementType(),
+                                      RHS, "red.partial");
+  Builder.CreateAtomicRMW(AtomicRMWInst::Xor, LHS, Partial, None,
+                          AtomicOrdering::Monotonic);
+  return Builder.saveIP();
+}
+
+/// Populate Calls with call instructions calling the function with the given
+/// FnID from the given function F.
+static void findCalls(Function *F, omp::RuntimeFunction FnID,
+                      OpenMPIRBuilder &OMPBuilder,
+                      SmallVectorImpl<CallInst *> &Calls) {
+  Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID);
+  for (BasicBlock &BB : *F) {
+    for (Instruction &I : BB) {
+      auto *Call = dyn_cast<CallInst>(&I);
+      if (Call && Call->getCalledFunction() == Fn)
+        Calls.push_back(Call);
+    }
+  }
+}
+
+TEST_F(OpenMPIRBuilderTest, CreateReductions) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+  // Create variables to be reduced.
+  InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
+                              F->getEntryBlock().getFirstInsertionPt());
+  Value *SumReduced;
+  Value *XorReduced;
+  {
+    IRBuilderBase::InsertPointGuard Guard(Builder);
+    Builder.restoreIP(OuterAllocaIP);
+    SumReduced = Builder.CreateAlloca(Builder.getFloatTy());
+    XorReduced = Builder.CreateAlloca(Builder.getInt32Ty());
+  }
+
+  // Store initial values of reductions into global variables.
+  Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
+  Builder.CreateStore(Builder.getInt32(1), XorReduced);
+
+  // The loop body computes two reductions:
+  //   sum of (float) thread-id;
+  //   xor of thread-id;
+  // and store the result in global variables.
+  InsertPointTy BodyIP, BodyAllocaIP;
+  auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
+                       BasicBlock &ContinuationBB) {
+    IRBuilderBase::InsertPointGuard Guard(Builder);
+    Builder.restoreIP(CodeGenIP);
+
+    Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc);
+    Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr);
+    Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
+    Value *SumLocal =
+        Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
+    Value *SumPartial =
+        Builder.CreateLoad(SumReduced->getType()->getPointerElementType(),
+                           SumReduced, "sum.partial");
+    Value *XorPartial =
+        Builder.CreateLoad(XorReduced->getType()->getPointerElementType(),
+                           XorReduced, "xor.partial");
+    Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
+    Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
+    Builder.CreateStore(Sum, SumReduced);
+    Builder.CreateStore(Xor, XorReduced);
+
+    BodyIP = Builder.saveIP();
+    BodyAllocaIP = InnerAllocaIP;
+  };
+
+  // Privatization for reduction creates local copies of reduction variables and
+  // initializes them to reduction-neutral values.
+  Value *SumPrivatized;
+  Value *XorPrivatized;
+  auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
+                    Value &Original, Value &Inner, Value *&ReplVal) {
+    IRBuilderBase::InsertPointGuard Guard(Builder);
+    Builder.restoreIP(InnerAllocaIP);
+    if (&Original == SumReduced) {
+      SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
+      ReplVal = SumPrivatized;
+    } else if (&Original == XorReduced) {
+      XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
+      ReplVal = XorPrivatized;
+    } else {
+      ReplVal = &Inner;
+      return CodeGenIP;
+    }
+
+    Builder.restoreIP(CodeGenIP);
+    if (&Original == SumReduced)
+      Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
+                          SumPrivatized);
+    else if (&Original == XorReduced)
+      Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
+
+    return Builder.saveIP();
+  };
+
+  // Do nothing in finalization.
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; };
+
+  InsertPointTy AfterIP =
+      OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB,
+                                /* IfCondition */ nullptr,
+                                /* NumThreads */ nullptr, OMP_PROC_BIND_default,
+                                /* IsCancellable */ false);
+  Builder.restoreIP(AfterIP);
+
+  OpenMPIRBuilder::ReductionInfo ReductionInfos[] = {
+      {SumReduced, SumPrivatized, sumReduction, sumAtomicReduction},
+      {XorReduced, XorPrivatized, xorReduction, xorAtomicReduction}};
+
+  OMPBuilder.createReductions(BodyIP, BodyAllocaIP, ReductionInfos);
+
+  Builder.restoreIP(AfterIP);
+  Builder.CreateRetVoid();
+
+  OMPBuilder.finalize(F);
+
+  // The IR must be valid.
+  EXPECT_FALSE(verifyModule(*M));
+
+  // Outlining must have happened.
+  SmallVector<CallInst *> ForkCalls;
+  findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
+            ForkCalls);
+  ASSERT_EQ(ForkCalls.size(), 1u);
+  Value *CalleeVal = cast<Constant>(ForkCalls[0]->getOperand(2))->getOperand(0);
+  Function *Outlined = dyn_cast<Function>(CalleeVal);
+  EXPECT_NE(Outlined, nullptr);
+
+  // Check that the lock variable was created with the expected name.
+  GlobalVariable *LockVar =
+      M->getGlobalVariable(".gomp_critical_user_.reduction.var");
+  EXPECT_NE(LockVar, nullptr);
+
+  // Find the allocation of a local array that will be used to call the runtime
+  // reduciton function.
+  BasicBlock &AllocBlock = Outlined->getEntryBlock();
+  Value *LocalArray = nullptr;
+  for (Instruction &I : AllocBlock) {
+    if (AllocaInst *Alloc = dyn_cast<AllocaInst>(&I)) {
+      if (!Alloc->getAllocatedType()->isArrayTy() ||
+          !Alloc->getAllocatedType()->getArrayElementType()->isPointerTy())
+        continue;
+      LocalArray = Alloc;
+      break;
+    }
+  }
+  ASSERT_NE(LocalArray, nullptr);
+
+  // Find the call to the runtime reduction function.
+  BasicBlock *BB = AllocBlock.getUniqueSuccessor();
+  Value *LocalArrayPtr = nullptr;
+  Value *ReductionFnVal = nullptr;
+  Value *SwitchArg = nullptr;
+  for (Instruction &I : *BB) {
+    if (CallInst *Call = dyn_cast<CallInst>(&I)) {
+      if (Call->getCalledFunction() !=
+          OMPBuilder.getOrCreateRuntimeFunctionPtr(
+              RuntimeFunction::OMPRTL___kmpc_reduce))
+        continue;
+      LocalArrayPtr = Call->getOperand(4);
+      ReductionFnVal = Call->getOperand(5);
+      SwitchArg = Call;
+      break;
+    }
+  }
+
+  // Check that the local array is passed to the function.
+  ASSERT_NE(LocalArrayPtr, nullptr);
+  BitCastInst *BitCast = dyn_cast<BitCastInst>(LocalArrayPtr);
+  ASSERT_NE(BitCast, nullptr);
+  EXPECT_EQ(BitCast->getOperand(0), LocalArray);
+
+  // Find the GEP instructions preceding stores to the local array.
+  Value *FirstArrayElemPtr = nullptr;
+  Value *SecondArrayElemPtr = nullptr;
+  EXPECT_EQ(LocalArray->getNumUses(), 3u);
+  ASSERT_TRUE(
+      findGEPZeroOne(LocalArray, FirstArrayElemPtr, SecondArrayElemPtr));
+
+  // Check that the values stored into the local array are privatized reduction
+  // variables.
+  auto *FirstStored = dyn_cast_or_null<BitCastInst>(
+      findStoredValue<GetElementPtrInst>(FirstArrayElemPtr));
+  auto *SecondStored = dyn_cast_or_null<BitCastInst>(
+      findStoredValue<GetElementPtrInst>(SecondArrayElemPtr));
+  ASSERT_NE(FirstStored, nullptr);
+  ASSERT_NE(SecondStored, nullptr);
+  Value *FirstPrivatized = FirstStored->getOperand(0);
+  Value *SecondPrivatized = SecondStored->getOperand(0);
+  EXPECT_TRUE(
+      isSimpleBinaryReduction(FirstPrivatized, FirstStored->getParent()));
+  EXPECT_TRUE(
+      isSimpleBinaryReduction(SecondPrivatized, SecondStored->getParent()));
+
+  // Check that the result of the runtime reduction call is used for further
+  // dispatch.
+  ASSERT_EQ(SwitchArg->getNumUses(), 1u);
+  SwitchInst *Switch = dyn_cast<SwitchInst>(*SwitchArg->user_begin());
+  ASSERT_NE(Switch, nullptr);
+  EXPECT_EQ(Switch->getNumSuccessors(), 3u);
+  BasicBlock *NonAtomicBB = Switch->case_begin()->getCaseSuccessor();
+  BasicBlock *AtomicBB = std::next(Switch->case_begin())->getCaseSuccessor();
+
+  // Non-atomic block contains reductions to the global reduction variable,
+  // which is passed into the outlined function as an argument.
+  Value *FirstLoad =
+      findSingleUserInBlock<LoadInst>(FirstPrivatized, NonAtomicBB);
+  Value *SecondLoad =
+      findSingleUserInBlock<LoadInst>(SecondPrivatized, NonAtomicBB);
+  EXPECT_TRUE(isValueReducedToFuncArg(FirstLoad, NonAtomicBB));
+  EXPECT_TRUE(isValueReducedToFuncArg(SecondLoad, NonAtomicBB));
+
+  // Atomic block also constains reductions to the global reduction variable.
+  FirstLoad = findSingleUserInBlock<LoadInst>(FirstPrivatized, AtomicBB);
+  SecondLoad = findSingleUserInBlock<LoadInst>(SecondPrivatized, AtomicBB);
+  auto *FirstAtomic = findSingleUserInBlock<AtomicRMWInst>(FirstLoad, AtomicBB);
+  auto *SecondAtomic =
+      findSingleUserInBlock<AtomicRMWInst>(SecondLoad, AtomicBB);
+  ASSERT_NE(FirstAtomic, nullptr);
+  EXPECT_TRUE(isa<Argument>(FirstAtomic->getPointerOperand()));
+  ASSERT_NE(SecondAtomic, nullptr);
+  EXPECT_TRUE(isa<Argument>(SecondAtomic->getPointerOperand()));
+
+  // Check that the separate reduction function also performs (non-atomic)
+  // reductions after extracting reduction variables from its arguments.
+  Function *ReductionFn = cast<Function>(ReductionFnVal);
+  BasicBlock *FnReductionBB = &ReductionFn->getEntryBlock();
+  auto *Bitcast =
+      findSingleUserInBlock<BitCastInst>(ReductionFn->getArg(0), FnReductionBB);
+  Value *FirstLHSPtr;
+  Value *SecondLHSPtr;
+  ASSERT_TRUE(findGEPZeroOne(Bitcast, FirstLHSPtr, SecondLHSPtr));
+  Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
+  ASSERT_NE(Opaque, nullptr);
+  Bitcast = findSingleUserInBlock<BitCastInst>(Opaque, FnReductionBB);
+  ASSERT_NE(Bitcast, nullptr);
+  EXPECT_TRUE(isSimpleBinaryReduction(Bitcast, FnReductionBB));
+  Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
+  ASSERT_NE(Opaque, nullptr);
+  Bitcast = findSingleUserInBlock<BitCastInst>(Opaque, FnReductionBB);
+  ASSERT_NE(Bitcast, nullptr);
+  EXPECT_TRUE(isSimpleBinaryReduction(Bitcast, FnReductionBB));
+
+  Bitcast =
+      findSingleUserInBlock<BitCastInst>(ReductionFn->getArg(1), FnReductionBB);
+  Value *FirstRHS;
+  Value *SecondRHS;
+  EXPECT_TRUE(findGEPZeroOne(Bitcast, FirstRHS, SecondRHS));
+}
+
+TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+  // Create variables to be reduced.
+  InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
+                              F->getEntryBlock().getFirstInsertionPt());
+  Value *SumReduced;
+  Value *XorReduced;
+  {
+    IRBuilderBase::InsertPointGuard Guard(Builder);
+    Builder.restoreIP(OuterAllocaIP);
+    SumReduced = Builder.CreateAlloca(Builder.getFloatTy());
+    XorReduced = Builder.CreateAlloca(Builder.getInt32Ty());
+  }
+
+  // Store initial values of reductions into global variables.
+  Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
+  Builder.CreateStore(Builder.getInt32(1), XorReduced);
+
+  InsertPointTy FirstBodyIP, FirstBodyAllocaIP;
+  auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP,
+                            InsertPointTy CodeGenIP,
+                            BasicBlock &ContinuationBB) {
+    IRBuilderBase::InsertPointGuard Guard(Builder);
+    Builder.restoreIP(CodeGenIP);
+
+    Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc);
+    Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr);
+    Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
+    Value *SumLocal =
+        Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
+    Value *SumPartial =
+        Builder.CreateLoad(SumReduced->getType()->getPointerElementType(),
+                           SumReduced, "sum.partial");
+    Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
+    Builder.CreateStore(Sum, SumReduced);
+
+    FirstBodyIP = Builder.saveIP();
+    FirstBodyAllocaIP = InnerAllocaIP;
+  };
+
+  InsertPointTy SecondBodyIP, SecondBodyAllocaIP;
+  auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP,
+                             InsertPointTy CodeGenIP,
+                             BasicBlock &ContinuationBB) {
+    IRBuilderBase::InsertPointGuard Guard(Builder);
+    Builder.restoreIP(CodeGenIP);
+
+    Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc);
+    Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr);
+    Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
+    Value *XorPartial =
+        Builder.CreateLoad(XorReduced->getType()->getPointerElementType(),
+                           XorReduced, "xor.partial");
+    Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
+    Builder.CreateStore(Xor, XorReduced);
+
+    SecondBodyIP = Builder.saveIP();
+    SecondBodyAllocaIP = InnerAllocaIP;
+  };
+
+  // Privatization for reduction creates local copies of reduction variables and
+  // initializes them to reduction-neutral values. The same privatization
+  // callback is used for both loops, with dispatch based on the value being
+  // privatized.
+  Value *SumPrivatized;
+  Value *XorPrivatized;
+  auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
+                    Value &Original, Value &Inner, Value *&ReplVal) {
+    IRBuilderBase::InsertPointGuard Guard(Builder);
+    Builder.restoreIP(InnerAllocaIP);
+    if (&Original == SumReduced) {
+      SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
+      ReplVal = SumPrivatized;
+    } else if (&Original == XorReduced) {
+      XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
+      ReplVal = XorPrivatized;
+    } else {
+      ReplVal = &Inner;
+      return CodeGenIP;
+    }
+
+    Builder.restoreIP(CodeGenIP);
+    if (&Original == SumReduced)
+      Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
+                          SumPrivatized);
+    else if (&Original == XorReduced)
+      Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
+
+    return Builder.saveIP();
+  };
+
+  // Do nothing in finalization.
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; };
+
+  Builder.restoreIP(
+      OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB,
+                                FiniCB, /* IfCondition */ nullptr,
+                                /* NumThreads */ nullptr, OMP_PROC_BIND_default,
+                                /* IsCancellable */ false));
+  InsertPointTy AfterIP = OMPBuilder.createParallel(
+      {Builder.saveIP(), DL}, OuterAllocaIP, SecondBodyGenCB, PrivCB, FiniCB,
+      /* IfCondition */ nullptr,
+      /* NumThreads */ nullptr, OMP_PROC_BIND_default,
+      /* IsCancellable */ false);
+
+  OMPBuilder.createReductions(
+      FirstBodyIP, FirstBodyAllocaIP,
+      {{SumReduced, SumPrivatized, sumReduction, sumAtomicReduction}});
+  OMPBuilder.createReductions(
+      SecondBodyIP, SecondBodyAllocaIP,
+      {{XorReduced, XorPrivatized, xorReduction, xorAtomicReduction}});
+
+  Builder.restoreIP(AfterIP);
+  Builder.CreateRetVoid();
+
+  OMPBuilder.finalize(F);
+
+  // The IR must be valid.
+  EXPECT_FALSE(verifyModule(*M));
+
+  // Two 
diff erent outlined functions must have been created.
+  SmallVector<CallInst *> ForkCalls;
+  findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
+            ForkCalls);
+  ASSERT_EQ(ForkCalls.size(), 2u);
+  Value *CalleeVal = cast<Constant>(ForkCalls[0]->getOperand(2))->getOperand(0);
+  Function *FirstCallee = cast<Function>(CalleeVal);
+  CalleeVal = cast<Constant>(ForkCalls[1]->getOperand(2))->getOperand(0);
+  Function *SecondCallee = cast<Function>(CalleeVal);
+  EXPECT_NE(FirstCallee, SecondCallee);
+
+  // Two 
diff erent reduction functions must have been created.
+  SmallVector<CallInst *> ReduceCalls;
+  findCalls(FirstCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, OMPBuilder,
+            ReduceCalls);
+  ASSERT_EQ(ReduceCalls.size(), 1u);
+  auto *AddReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
+  ReduceCalls.clear();
+  findCalls(SecondCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce,
+            OMPBuilder, ReduceCalls);
+  auto *XorReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
+  EXPECT_NE(AddReduction, XorReduction);
+
+  // Each reduction function does its own kind of reduction.
+  BasicBlock *FnReductionBB = &AddReduction->getEntryBlock();
+  auto *Bitcast = findSingleUserInBlock<BitCastInst>(AddReduction->getArg(0),
+                                                     FnReductionBB);
+  ASSERT_NE(Bitcast, nullptr);
+  Value *FirstLHSPtr =
+      findSingleUserInBlock<GetElementPtrInst>(Bitcast, FnReductionBB);
+  ASSERT_NE(FirstLHSPtr, nullptr);
+  Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
+  ASSERT_NE(Opaque, nullptr);
+  Bitcast = findSingleUserInBlock<BitCastInst>(Opaque, FnReductionBB);
+  ASSERT_NE(Bitcast, nullptr);
+  Instruction::BinaryOps Opcode = Instruction::FAdd;
+  EXPECT_TRUE(isSimpleBinaryReduction(Bitcast, FnReductionBB, &Opcode));
+
+  FnReductionBB = &XorReduction->getEntryBlock();
+  Bitcast = findSingleUserInBlock<BitCastInst>(XorReduction->getArg(0),
+                                               FnReductionBB);
+  ASSERT_NE(Bitcast, nullptr);
+  Value *SecondLHSPtr =
+      findSingleUserInBlock<GetElementPtrInst>(Bitcast, FnReductionBB);
+  ASSERT_NE(FirstLHSPtr, nullptr);
+  Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
+  ASSERT_NE(Opaque, nullptr);
+  Bitcast = findSingleUserInBlock<BitCastInst>(Opaque, FnReductionBB);
+  ASSERT_NE(Bitcast, nullptr);
+  Opcode = Instruction::Xor;
+  EXPECT_TRUE(isSimpleBinaryReduction(Bitcast, FnReductionBB, &Opcode));
+}
+
 TEST_F(OpenMPIRBuilderTest, CreateSections) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;


        


More information about the llvm-commits mailing list