[llvm] 8931add - [OpenMPOpt][HideMemTransfersLatency] Get values stored in offload arrays
Hamilton Tobon Mosquera via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 31 13:33:27 PDT 2020
Author: Hamilton Tobon Mosquera
Date: 2020-08-31T15:33:05-05:00
New Revision: 8931add6170508704007f1a410993e6aec879c01
URL: https://github.com/llvm/llvm-project/commit/8931add6170508704007f1a410993e6aec879c01
DIFF: https://github.com/llvm/llvm-project/commit/8931add6170508704007f1a410993e6aec879c01.diff
LOG: [OpenMPOpt][HideMemTransfersLatency] Get values stored in offload arrays
getValuesInOffloadArrays goes through the offload arrays in __tgt_target_data_begin_mapper getting the values stored in them before the call is issued.
call void @__tgt_target_data_begin_mapper(arg0, arg1,
i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
...)
Diferential Revision: https://reviews.llvm.org/D86300
Added:
llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll
Modified:
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index d596508276374..bd4e2f769af09 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -26,6 +26,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
+#include "llvm/Analysis/ValueTracking.h"
using namespace llvm;
using namespace omp;
@@ -379,6 +380,87 @@ struct OMPInformationCache : public InformationCache {
SmallPtrSetImpl<Kernel> &Kernels;
};
+/// Used to map the values physically (in the IR) stored in an offload
+/// array, to a vector in memory.
+struct OffloadArray {
+ /// Physical array (in the IR).
+ AllocaInst *Array = nullptr;
+ /// Mapped values.
+ SmallVector<Value *, 8> StoredValues;
+ /// Last stores made in the offload array.
+ SmallVector<StoreInst *, 8> LastAccesses;
+
+ OffloadArray() = default;
+
+ /// Initializes the OffloadArray with the values stored in \p Array before
+ /// instruction \p Before is reached. Returns false if the initialization
+ /// fails.
+ /// This MUST be used immediately after the construction of the object.
+ bool initialize(AllocaInst &Array, Instruction &Before) {
+ if (!Array.getAllocatedType()->isArrayTy())
+ return false;
+
+ if (!getValues(Array, Before))
+ return false;
+
+ this->Array = &Array;
+ return true;
+ }
+
+private:
+ /// Traverses the BasicBlock where \p Array is, collecting the stores made to
+ /// \p Array, leaving StoredValues with the values stored before the
+ /// instruction \p Before is reached.
+ bool getValues(AllocaInst &Array, Instruction &Before) {
+ // Initialize container.
+ const uint64_t NumValues =
+ Array.getAllocatedType()->getArrayNumElements();
+ StoredValues.assign(NumValues, nullptr);
+ LastAccesses.assign(NumValues, nullptr);
+
+ // TODO: This assumes the instruction \p Before is in the same
+ // BasicBlock as Array. Make it general, for any control flow graph.
+ BasicBlock *BB = Array.getParent();
+ if (BB != Before.getParent())
+ return false;
+
+ const DataLayout &DL = Array.getModule()->getDataLayout();
+ const unsigned int PointerSize = DL.getPointerSize();
+
+ for (Instruction &I : *BB) {
+ if (&I == &Before)
+ break;
+
+ if (!isa<StoreInst>(&I))
+ continue;
+
+ auto *S = cast<StoreInst>(&I);
+ int64_t Offset = -1;
+ auto *Dst = GetPointerBaseWithConstantOffset(S->getPointerOperand(),
+ Offset, DL);
+ if (Dst == &Array) {
+ int64_t Idx = Offset / PointerSize;
+ StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
+ LastAccesses[Idx] = S;
+ }
+ }
+
+ return isFilled();
+ }
+
+ /// Returns true if all values in StoredValues and
+ /// LastAccesses are not nullptrs.
+ bool isFilled() {
+ const unsigned NumValues = StoredValues.size();
+ for (unsigned I = 0; I < NumValues; ++I) {
+ if (!StoredValues[I] || !LastAccesses[I])
+ return false;
+ }
+
+ return true;
+ }
+};
+
struct OpenMPOpt {
using OptimizationRemarkGetter =
@@ -589,6 +671,12 @@ struct OpenMPOpt {
if (!RTCall)
return false;
+ OffloadArray OffloadArrays[3];
+ if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
+ return false;
+
+ LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
+
// TODO: Check if can be moved upwards.
bool WasSplit = false;
Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
@@ -603,6 +691,93 @@ struct OpenMPOpt {
return Changed;
}
+ /// Maps the values stored in the offload arrays passed as arguments to
+ /// \p RuntimeCall into the offload arrays in \p OAs.
+ bool getValuesInOffloadArrays(CallInst &RuntimeCall,
+ MutableArrayRef<OffloadArray> OAs) {
+ assert(OAs.size() == 3 && "Need space for three offload arrays!");
+
+ // A runtime call that involves memory offloading looks something like:
+ // call void @__tgt_target_data_begin_mapper(arg0, arg1,
+ // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
+ // ...)
+ // So, the idea is to access the allocas that allocate space for these
+ // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
+ // Therefore:
+ // i8** %offload_baseptrs.
+ const unsigned BasePtrsArgNum = 2;
+ Value *BasePtrsArg = RuntimeCall.getArgOperand(BasePtrsArgNum);
+ // i8** %offload_ptrs.
+ const unsigned PtrsArgNum = 3;
+ Value *PtrsArg = RuntimeCall.getArgOperand(PtrsArgNum);
+ // i8** %offload_sizes.
+ const unsigned SizesArgNum = 4;
+ Value *SizesArg = RuntimeCall.getArgOperand(SizesArgNum);
+
+ // Get values stored in **offload_baseptrs.
+ auto *V = getUnderlyingObject(BasePtrsArg);
+ if (!isa<AllocaInst>(V))
+ return false;
+ auto *BasePtrsArray = cast<AllocaInst>(V);
+ if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
+ return false;
+
+ // Get values stored in **offload_baseptrs.
+ V = getUnderlyingObject(PtrsArg);
+ if (!isa<AllocaInst>(V))
+ return false;
+ auto *PtrsArray = cast<AllocaInst>(V);
+ if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
+ return false;
+
+ // Get values stored in **offload_sizes.
+ V = getUnderlyingObject(SizesArg);
+ // If it's a [constant] global array don't analyze it.
+ if (isa<GlobalValue>(V))
+ return isa<Constant>(V);
+ if (!isa<AllocaInst>(V))
+ return false;
+
+ auto *SizesArray = cast<AllocaInst>(V);
+ if (!OAs[2].initialize(*SizesArray, RuntimeCall))
+ return false;
+
+ return true;
+ }
+
+ /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
+ /// For now this is a way to test that the function getValuesInOffloadArrays
+ /// is working properly.
+ /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
+ void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
+ assert(OAs.size() == 3 && "There are three offload arrays to debug!");
+
+ LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n");
+ std::string ValuesStr;
+ raw_string_ostream Printer(ValuesStr);
+ std::string Separator = " --- ";
+
+ for (auto *BP : OAs[0].StoredValues) {
+ BP->print(Printer);
+ Printer << Separator;
+ }
+ LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n");
+ ValuesStr.clear();
+
+ for (auto *P : OAs[1].StoredValues) {
+ P->print(Printer);
+ Printer << Separator;
+ }
+ LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n");
+ ValuesStr.clear();
+
+ for (auto *S : OAs[2].StoredValues) {
+ S->print(Printer);
+ Printer << Separator;
+ }
+ LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n");
+ }
+
/// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
/// moved. Returns nullptr if the movement is not possible, or not worth it.
Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
diff --git a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll
new file mode 100644
index 0000000000000..96195dbecc006
--- /dev/null
+++ b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll
@@ -0,0 +1,67 @@
+; RUN: opt -S -passes=openmpopt -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency -debug-only=openmp-opt < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+ at .__omp_offloading_heavyComputation.region_id = weak constant i8 0
+ at .offload_maptypes. = private unnamed_addr constant [2 x i64] [i64 35, i64 35]
+
+; CHECK-LABEL: {{[^@]+}}Successfully got offload values:
+; CHECK-NEXT: offload_baseptrs: double* %a --- %size.addr = alloca i32, align 4 ---
+; CHECK-NEXT: offload_ptrs: double* %a --- %size.addr = alloca i32, align 4 ---
+; CHECK-NEXT: offload_sizes: %0 = shl nuw nsw i64 %conv, 3 --- i64 4 ---
+
+;int heavyComputation(double* a, unsigned size) {
+; int random = rand() % 7;
+;
+; //#pragma omp target data map(a[0:size], size)
+; void* args[2];
+; args[0] = &a;
+; args[1] = &size;
+; __tgt_target_data_begin(..., args, ...)
+;
+; #pragma omp target teams
+; for (int i = 0; i < size; ++i) {
+; a[i] = ++a[i] * 3.141624;
+; }
+;
+; return random;
+;}
+define dso_local i32 @heavyComputation(double* %a, i32 %size) {
+entry:
+ %size.addr = alloca i32, align 4
+ %.offload_baseptrs = alloca [2 x i8*], align 8
+ %.offload_ptrs = alloca [2 x i8*], align 8
+ %.offload_sizes = alloca [2 x i64], align 8
+
+ store i32 %size, i32* %size.addr, align 4
+ %call = tail call i32 (...) @rand()
+
+ %conv = zext i32 %size to i64
+ %0 = shl nuw nsw i64 %conv, 3
+ %1 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 0
+ %2 = bitcast [2 x i8*]* %.offload_baseptrs to double**
+ store double* %a, double** %2, align 8
+ %3 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 0
+ %4 = bitcast [2 x i8*]* %.offload_ptrs to double**
+ store double* %a, double** %4, align 8
+ %5 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 0
+ store i64 %0, i64* %5, align 8
+ %6 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 1
+ %7 = bitcast i8** %6 to i32**
+ store i32* %size.addr, i32** %7, align 8
+ %8 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 1
+ %9 = bitcast i8** %8 to i32**
+ store i32* %size.addr, i32** %9, align 8
+ %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1
+ store i64 4, i64* %10, align 8
+ call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes., i64 0, i64 0), i8** null)
+ %rem = srem i32 %call, 7
+ call void @__tgt_target_data_end_mapper(i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes., i64 0, i64 0), i8** null)
+ ret i32 %rem
+}
+
+declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+
+declare dso_local i32 @rand(...)
More information about the llvm-commits
mailing list