[polly] r309728 - [Simplify] Improve scalability.
Michael Kruse via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 1 12:39:11 PDT 2017
Author: meinersbur
Date: Tue Aug 1 12:39:11 2017
New Revision: 309728
URL: http://llvm.org/viewvc/llvm-project?rev=309728&view=rev
Log:
[Simplify] Improve scalability.
With a lot of reads and writes to the same array in a statement,
some isl sets that capture the state between access can become
complex such that isl takes more considerable time and memory
for operations on them.
The problems identified were:
- is_subset() takes considerable time with many disjoints in the
arguments. We limit the number of disjoints to 4, any additional
information is thrown away.
- subtract() can lead to many disjoints. We instead assume that any
array element is possibly accessed, which removes all disjoints.
- subtract_domain() may lead to considerable processing, even if all
elements are are to be removed. Instead, we remove determine and
remove the affected spaces manually. No behaviour is changed.
Added:
polly/trunk/test/Simplify/scalability1.ll
polly/trunk/test/Simplify/scalability2.ll
Modified:
polly/trunk/lib/Transform/Simplify.cpp
Modified: polly/trunk/lib/Transform/Simplify.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/Simplify.cpp?rev=309728&r1=309727&r2=309728&view=diff
==============================================================================
--- polly/trunk/lib/Transform/Simplify.cpp (original)
+++ polly/trunk/lib/Transform/Simplify.cpp Tue Aug 1 12:39:11 2017
@@ -27,6 +27,12 @@ using namespace polly;
namespace {
+/// Number of max disjuncts we allow in removeOverwrites(). This is to avoid
+/// that the analysis of accesses in a statement is becoming too complex. Chosen
+/// to be relatively small because all the common cases should access only few
+/// array elements per statement.
+static int const SimplifyMaxDisjuncts = 4;
+
STATISTIC(ScopsProcessed, "Number of SCoPs processed");
STATISTIC(ScopsModified, "Number of SCoPs simplified");
@@ -57,6 +63,45 @@ static bool isImplicitWrite(MemoryAccess
return MA->isWrite() && MA->isOriginalScalarKind();
}
+/// Like isl::union_map::add_map, but may also return an underapproximated
+/// result if getting too complex.
+///
+/// This is implemented by adding disjuncts to the results until the limit is
+/// reached.
+static isl::union_map underapproximatedAddMap(isl::union_map UMap,
+ isl::map Map) {
+ if (UMap.is_null() || Map.is_null())
+ return {};
+
+ isl::map PrevMap = UMap.extract_map(Map.get_space());
+
+ // Fast path: If known that we cannot exceed the disjunct limit, just add
+ // them.
+ if (isl_map_n_basic_map(PrevMap.get()) + isl_map_n_basic_map(Map.get()) <=
+ SimplifyMaxDisjuncts)
+ return UMap.add_map(Map);
+
+ isl::map Result = isl::map::empty(PrevMap.get_space());
+ PrevMap.foreach_basic_map([&Result](isl::basic_map BMap) -> isl::stat {
+ if (isl_map_n_basic_map(Result.get()) > SimplifyMaxDisjuncts)
+ return isl::stat::error;
+ Result = Result.unite(BMap);
+ return isl::stat::ok;
+ });
+ Map.foreach_basic_map([&Result](isl::basic_map BMap) -> isl::stat {
+ if (isl_map_n_basic_map(Result.get()) > SimplifyMaxDisjuncts)
+ return isl::stat::error;
+ Result = Result.unite(BMap);
+ return isl::stat::ok;
+ });
+
+ isl::union_map UResult =
+ UMap.subtract(isl::map::universe(PrevMap.get_space()));
+ UResult.add_map(Result);
+
+ return UResult;
+}
+
/// Return a vector that contains MemoryAccesses in the order in
/// which they are executed.
///
@@ -223,7 +268,10 @@ private:
// If a value is read in-between, do not consider it as overwritten.
if (MA->isRead()) {
- WillBeOverwritten = WillBeOverwritten.subtract(AccRel);
+ // Invalidate all overwrites for the array it accesses to avoid too
+ // complex isl sets.
+ isl::map AccRelUniv = isl::map::universe(AccRel.get_space());
+ WillBeOverwritten = WillBeOverwritten.subtract(AccRelUniv);
continue;
}
@@ -239,8 +287,12 @@ private:
}
// Unconditional writes overwrite other values.
- if (MA->isMustWrite())
- WillBeOverwritten = WillBeOverwritten.add_map(AccRel);
+ if (MA->isMustWrite()) {
+ // Avoid too complex isl sets. If necessary, throw away some of the
+ // knowledge.
+ WillBeOverwritten =
+ underapproximatedAddMap(WillBeOverwritten, AccRel);
+ }
}
}
}
@@ -385,14 +437,31 @@ private:
// from the list of eligible writes. Don't just remove the accessed
// elements, but any MemoryAccess that touches any of the invalidated
// elements.
- // { MemoryAccess[] }
- isl::union_set TouchedAccesses =
- FutureWrites.intersect_domain(AccRelWrapped)
- .range()
- .unwrap()
- .range();
- FutureWrites =
- FutureWrites.uncurry().subtract_range(TouchedAccesses).curry();
+ SmallPtrSet<MemoryAccess *, 2> TouchedAccesses;
+ FutureWrites.intersect_domain(AccRelWrapped)
+ .foreach_map([&TouchedAccesses](isl::map Map) -> isl::stat {
+ MemoryAccess *MA = (MemoryAccess *)Map.get_space()
+ .range()
+ .unwrap()
+ .get_tuple_id(isl::dim::out)
+ .get_user();
+ TouchedAccesses.insert(MA);
+ return isl::stat::ok;
+ });
+ isl::union_map NewFutureWrites =
+ isl::union_map::empty(FutureWrites.get_space());
+ FutureWrites.foreach_map([&TouchedAccesses, &NewFutureWrites](
+ isl::map FutureWrite) -> isl::stat {
+ MemoryAccess *MA = (MemoryAccess *)FutureWrite.get_space()
+ .range()
+ .unwrap()
+ .get_tuple_id(isl::dim::out)
+ .get_user();
+ if (!TouchedAccesses.count(MA))
+ NewFutureWrites = NewFutureWrites.add_map(FutureWrite);
+ return isl::stat::ok;
+ });
+ FutureWrites = NewFutureWrites;
if (MA->isMustWrite() && !ValSet.is_null()) {
// { MemoryAccess[] }
Added: polly/trunk/test/Simplify/scalability1.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Simplify/scalability1.ll?rev=309728&view=auto
==============================================================================
--- polly/trunk/test/Simplify/scalability1.ll (added)
+++ polly/trunk/test/Simplify/scalability1.ll Tue Aug 1 12:39:11 2017
@@ -0,0 +1,104 @@
+; RUN: opt %loadPolly -polly-ignore-inbounds -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+;
+; Test scalability.
+;
+define void @func(i32 %n, double* noalias nonnull %A,
+i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %p7, i32 %p8, i32 %p9,
+i32 %p10, i32 %p11, i32 %p12, i32 %p13, i32 %p14, i32 %p15, i32 %p16, i32 %p17, i32 %p18, i32 %p19,
+i32 %p20, i32 %p21, i32 %p22, i32 %p23, i32 %p24, i32 %p25, i32 %p26, i32 %p27, i32 %p28, i32 %p29,
+i32 %p30, i32 %p31, i32 %p32) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %body, label %exit
+
+
+ body:
+ %A0 = getelementptr inbounds double, double* %A, i32 %p0
+ %A1 = getelementptr inbounds double, double* %A, i32 %p1
+ %A2 = getelementptr inbounds double, double* %A, i32 %p2
+ %A3 = getelementptr inbounds double, double* %A, i32 %p3
+ %A4 = getelementptr inbounds double, double* %A, i32 %p4
+ %A5 = getelementptr inbounds double, double* %A, i32 %p5
+ %A6 = getelementptr inbounds double, double* %A, i32 %p6
+ %A7 = getelementptr inbounds double, double* %A, i32 %p7
+ %A8 = getelementptr inbounds double, double* %A, i32 %p8
+ %A9 = getelementptr inbounds double, double* %A, i32 %p9
+ %A10 = getelementptr inbounds double, double* %A, i32 %p10
+ %A11 = getelementptr inbounds double, double* %A, i32 %p11
+ %A12 = getelementptr inbounds double, double* %A, i32 %p12
+ %A13 = getelementptr inbounds double, double* %A, i32 %p13
+ %A14 = getelementptr inbounds double, double* %A, i32 %p14
+ %A15 = getelementptr inbounds double, double* %A, i32 %p15
+ %A16 = getelementptr inbounds double, double* %A, i32 %p16
+ %A17 = getelementptr inbounds double, double* %A, i32 %p17
+ %A18 = getelementptr inbounds double, double* %A, i32 %p18
+ %A19 = getelementptr inbounds double, double* %A, i32 %p19
+ %A20 = getelementptr inbounds double, double* %A, i32 %p20
+ %A21 = getelementptr inbounds double, double* %A, i32 %p21
+ %A22 = getelementptr inbounds double, double* %A, i32 %p22
+ %A23 = getelementptr inbounds double, double* %A, i32 %p23
+ %A24 = getelementptr inbounds double, double* %A, i32 %p24
+ %A25 = getelementptr inbounds double, double* %A, i32 %p25
+ %A26 = getelementptr inbounds double, double* %A, i32 %p26
+ %A27 = getelementptr inbounds double, double* %A, i32 %p27
+ %A28 = getelementptr inbounds double, double* %A, i32 %p28
+ %A29 = getelementptr inbounds double, double* %A, i32 %p29
+ %A30 = getelementptr inbounds double, double* %A, i32 %p30
+ %A31 = getelementptr inbounds double, double* %A, i32 %p31
+ %A32 = getelementptr inbounds double, double* %A, i32 %p32
+
+ %val = load double, double* %A0
+
+ store double %val, double* %A1
+ store double %val, double* %A2
+ store double %val, double* %A3
+ store double %val, double* %A4
+ store double %val, double* %A5
+ store double %val, double* %A6
+ store double %val, double* %A7
+ store double %val, double* %A8
+ store double %val, double* %A9
+ store double %val, double* %A10
+ store double %val, double* %A11
+ store double %val, double* %A12
+ store double %val, double* %A13
+ store double %val, double* %A14
+ store double %val, double* %A15
+ store double %val, double* %A16
+ store double %val, double* %A17
+ store double %val, double* %A18
+ store double %val, double* %A19
+ store double %val, double* %A20
+ store double %val, double* %A21
+ store double %val, double* %A22
+ store double %val, double* %A23
+ store double %val, double* %A24
+ store double %val, double* %A25
+ store double %val, double* %A26
+ store double %val, double* %A27
+ store double %val, double* %A28
+ store double %val, double* %A29
+ store double %val, double* %A30
+ store double %val, double* %A31
+ store double %val, double* %A32
+
+ br label %inc
+
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: SCoP could not be simplified
Added: polly/trunk/test/Simplify/scalability2.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Simplify/scalability2.ll?rev=309728&view=auto
==============================================================================
--- polly/trunk/test/Simplify/scalability2.ll (added)
+++ polly/trunk/test/Simplify/scalability2.ll Tue Aug 1 12:39:11 2017
@@ -0,0 +1,188 @@
+; RUN: opt %loadPolly -polly-ignore-inbounds -polly-simplify -analyze < %s | FileCheck %s -match-full-lines
+;
+; Test scalability.
+;
+define void @func(i32 %n, double* noalias nonnull %A,
+i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %p7, i32 %p8, i32 %p9,
+i32 %p10, i32 %p11, i32 %p12, i32 %p13, i32 %p14, i32 %p15, i32 %p16, i32 %p17, i32 %p18, i32 %p19,
+i32 %p20, i32 %p21, i32 %p22, i32 %p23, i32 %p24, i32 %p25, i32 %p26, i32 %p27, i32 %p28, i32 %p29,
+i32 %p30, i32 %p31, i32 %p32, i32 %p33, i32 %p34, i32 %p35, i32 %p36, i32 %p37, i32 %p38, i32 %p39,
+i32 %p40, i32 %p41, i32 %p42, i32 %p43, i32 %p44, i32 %p45, i32 %p46, i32 %p47, i32 %p48, i32 %p49,
+i32 %p50, i32 %p51, i32 %p52, i32 %p53, i32 %p54, i32 %p55, i32 %p56, i32 %p57, i32 %p58, i32 %p59) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %body, label %exit
+
+
+ body:
+ %A0 = getelementptr inbounds double, double* %A, i32 %p0
+ %A1 = getelementptr inbounds double, double* %A, i32 %p1
+ %A2 = getelementptr inbounds double, double* %A, i32 %p2
+ %A3 = getelementptr inbounds double, double* %A, i32 %p3
+ %A4 = getelementptr inbounds double, double* %A, i32 %p4
+ %A5 = getelementptr inbounds double, double* %A, i32 %p5
+ %A6 = getelementptr inbounds double, double* %A, i32 %p6
+ %A7 = getelementptr inbounds double, double* %A, i32 %p7
+ %A8 = getelementptr inbounds double, double* %A, i32 %p8
+ %A9 = getelementptr inbounds double, double* %A, i32 %p9
+ %A10 = getelementptr inbounds double, double* %A, i32 %p10
+ %A11 = getelementptr inbounds double, double* %A, i32 %p11
+ %A12 = getelementptr inbounds double, double* %A, i32 %p12
+ %A13 = getelementptr inbounds double, double* %A, i32 %p13
+ %A14 = getelementptr inbounds double, double* %A, i32 %p14
+ %A15 = getelementptr inbounds double, double* %A, i32 %p15
+ %A16 = getelementptr inbounds double, double* %A, i32 %p16
+ %A17 = getelementptr inbounds double, double* %A, i32 %p17
+ %A18 = getelementptr inbounds double, double* %A, i32 %p18
+ %A19 = getelementptr inbounds double, double* %A, i32 %p19
+ %A20 = getelementptr inbounds double, double* %A, i32 %p20
+ %A21 = getelementptr inbounds double, double* %A, i32 %p21
+ %A22 = getelementptr inbounds double, double* %A, i32 %p22
+ %A23 = getelementptr inbounds double, double* %A, i32 %p23
+ %A24 = getelementptr inbounds double, double* %A, i32 %p24
+ %A25 = getelementptr inbounds double, double* %A, i32 %p25
+ %A26 = getelementptr inbounds double, double* %A, i32 %p26
+ %A27 = getelementptr inbounds double, double* %A, i32 %p27
+ %A28 = getelementptr inbounds double, double* %A, i32 %p28
+ %A29 = getelementptr inbounds double, double* %A, i32 %p29
+ %A30 = getelementptr inbounds double, double* %A, i32 %p30
+ %A31 = getelementptr inbounds double, double* %A, i32 %p31
+ %A32 = getelementptr inbounds double, double* %A, i32 %p32
+ %A33 = getelementptr inbounds double, double* %A, i32 %p33
+ %A34 = getelementptr inbounds double, double* %A, i32 %p34
+ %A35 = getelementptr inbounds double, double* %A, i32 %p35
+ %A36 = getelementptr inbounds double, double* %A, i32 %p36
+ %A37 = getelementptr inbounds double, double* %A, i32 %p37
+ %A38 = getelementptr inbounds double, double* %A, i32 %p38
+ %A39 = getelementptr inbounds double, double* %A, i32 %p39
+ %A40 = getelementptr inbounds double, double* %A, i32 %p40
+ %A41 = getelementptr inbounds double, double* %A, i32 %p41
+ %A42 = getelementptr inbounds double, double* %A, i32 %p42
+ %A43 = getelementptr inbounds double, double* %A, i32 %p43
+ %A44 = getelementptr inbounds double, double* %A, i32 %p44
+ %A45 = getelementptr inbounds double, double* %A, i32 %p45
+ %A46 = getelementptr inbounds double, double* %A, i32 %p46
+ %A47 = getelementptr inbounds double, double* %A, i32 %p47
+ %A48 = getelementptr inbounds double, double* %A, i32 %p48
+ %A49 = getelementptr inbounds double, double* %A, i32 %p49
+ %A50 = getelementptr inbounds double, double* %A, i32 %p50
+ %A51 = getelementptr inbounds double, double* %A, i32 %p51
+ %A52 = getelementptr inbounds double, double* %A, i32 %p52
+ %A53 = getelementptr inbounds double, double* %A, i32 %p53
+ %A54 = getelementptr inbounds double, double* %A, i32 %p54
+ %A55 = getelementptr inbounds double, double* %A, i32 %p55
+ %A56 = getelementptr inbounds double, double* %A, i32 %p56
+ %A57 = getelementptr inbounds double, double* %A, i32 %p57
+ %A58 = getelementptr inbounds double, double* %A, i32 %p58
+ %A59 = getelementptr inbounds double, double* %A, i32 %p59
+
+ %val0 = load double, double* %A0
+ store double %val0, double* %A1
+
+ %val2 = load double, double* %A2
+ store double %val2, double* %A3
+
+ %val4 = load double, double* %A4
+ store double %val4, double* %A5
+
+ %val6 = load double, double* %A6
+ store double %val6, double* %A7
+
+ %val8 = load double, double* %A8
+ store double %val8, double* %A9
+
+ %val10 = load double, double* %A10
+ store double %val10, double* %A11
+
+ %val12 = load double, double* %A12
+ store double %val12, double* %A13
+
+ %val13 = load double, double* %A13
+ store double %val13, double* %A15
+
+ %val16 = load double, double* %A16
+ store double %val16, double* %A17
+
+ %val18 = load double, double* %A18
+ store double %val18, double* %A19
+
+ %val20 = load double, double* %A20
+ store double %val20, double* %A21
+
+ %val22 = load double, double* %A22
+ store double %val22, double* %A23
+
+ %val24 = load double, double* %A24
+ store double %val24, double* %A25
+
+ %val26 = load double, double* %A26
+ store double %val26, double* %A27
+
+ %val28 = load double, double* %A28
+ store double %val28, double* %A29
+
+ %val30 = load double, double* %A30
+ store double %val30, double* %A31
+
+ %val32 = load double, double* %A32
+ store double %val32, double* %A33
+
+ %val34 = load double, double* %A34
+ store double %val34, double* %A35
+
+ %val36 = load double, double* %A36
+ store double %val36, double* %A37
+
+ %val38 = load double, double* %A38
+ store double %val38, double* %A39
+
+ %val40 = load double, double* %A40
+ store double %val40, double* %A41
+
+ %val42 = load double, double* %A42
+ store double %val42, double* %A43
+
+ %val44 = load double, double* %A44
+ store double %val44, double* %A45
+
+ %val46 = load double, double* %A46
+ store double %val46, double* %A47
+
+ %val48 = load double, double* %A48
+ store double %val48, double* %A49
+
+ %val50 = load double, double* %A50
+ store double %val50, double* %A51
+
+ %val52 = load double, double* %A52
+ store double %val52, double* %A53
+
+ %val54 = load double, double* %A54
+ store double %val54, double* %A55
+
+ %val56 = load double, double* %A56
+ store double %val56, double* %A57
+
+ %val58 = load double, double* %A58
+ store double %val58, double* %A59
+
+ br label %inc
+
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: SCoP could not be simplified
More information about the llvm-commits
mailing list