[polly] r303404 - [ScopInfo] Gracefully handle long compile times
Tobias Grosser via llvm-commits
llvm-commits at lists.llvm.org
Thu May 18 20:45:00 PDT 2017
Author: grosser
Date: Thu May 18 22:45:00 2017
New Revision: 303404
URL: http://llvm.org/viewvc/llvm-project?rev=303404&view=rev
Log:
[ScopInfo] Gracefully handle long compile times
The following test case tried to compute the lexicographic minimum of the
following set during alias analysis, which caused very long compile time:
[p_0, p_1, p_2, p_3, p_4, p_5] -> { MemRef0[i0] : (517p_3 >= 70944 - 298p_2 and
256i0 >= -71199 + 298p_2 + 517p_3 and 256i0 <= -70944 + 298p_2 + 517p_3) or
(409p_4 >= 57120 - 298p_2 and 256i0 >= -57375 + 298p_2 + 409p_4 and 256i0 <=
-57120 + 298p_2 + 409p_4) or (104p_4 >= 17329 + 149p_2 - 50p_3 and 128i0 >=
17328 + 149p_2 - 50p_3 - 104p_4 and 128i0 <= 17455 + 149p_2 - 50p_3 - 104p_4) or
(104p_4 <= 17328 + 149p_2 - 50p_3 and 128i0 >= 17201 + 149p_2 - 50p_3 - 104p_4
and 128i0 <= 17328 + 149p_2 - 50p_3 - 104p_4) or (409p_4 <= 57119 - 298p_2 and
256i0 >= -57120 + 298p_2 + 409p_4 and 256i0 <= -56865 + 298p_2 + 409p_4) or
(517p_3 <= 70943 - 298p_2 and 256i0 >= -70944 + 298p_2 + 517p_3 and 256i0 <=
-70689 + 298p_2 + 517p_3) or (p_1 >= 2 + 2p_0 and 298p_5 >= 70944 - 517p_3 and
256i0 >= -71199 + 517p_3 + 298p_5 and 256i0 <= -70944 + 517p_3 + 298p_5) or (p_1
>= 2 + 2p_0 and 298p_5 >= 57120 - 409p_4 and 256i0 >= -57375 + 409p_4 + 298p_5
>and 256i0 <= -57120 + 409p_4 + 298p_5) or (p_1 >= 2 + 2p_0 and 149p_5 <= -17329
>+ 50p_3 + 104p_4 and 128i0 >= 17328 - 50p_3 - 104p_4 + 149p_5 and 128i0 <=
>17455 - 50p_3 - 104p_4 + 149p_5) or (p_1 >= 2 + 2p_0 and 149p_5 >= -17328 +
>50p_3 + 104p_4 and 128i0 >= 17201 - 50p_3 - 104p_4 + 149p_5 and 128i0 <= 17328
>- 50p_3 - 104p_4 + 149p_5) or (p_1 >= 2 + 2p_0 and 298p_5 <= 57119 - 409p_4 and
>256i0 >= -57120 + 409p_4 + 298p_5 and 256i0 <= -56865 + 409p_4 + 298p_5) or
>(p_1 >= 2 + 2p_0 and 298p_5 <= 70943 - 517p_3 and 256i0 >= -70944 + 517p_3 +
>298p_5 and 256i0 <= -70689 + 517p_3 + 298p_5) }
We now guard the potentially expensive functions in Polly's scop analysis to
gracefully bail out in case of overly long compilation times.
Added:
polly/trunk/test/ScopInfo/long-compile-time-alias-analysis.ll
Modified:
polly/trunk/lib/Analysis/ScopInfo.cpp
Modified: polly/trunk/lib/Analysis/ScopInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/ScopInfo.cpp?rev=303404&r1=303403&r2=303404&view=diff
==============================================================================
--- polly/trunk/lib/Analysis/ScopInfo.cpp (original)
+++ polly/trunk/lib/Analysis/ScopInfo.cpp Thu May 18 22:45:00 2017
@@ -94,6 +94,12 @@ static int const MaxDisjunctsInDomain =
// number of disjunct when adding non-convex sets to the context.
static int const MaxDisjunctsInContext = 4;
+static cl::opt<int> OptComputeOut(
+ "polly-analysis-computeout",
+ cl::desc("Bound the dependence analysis by a maximal amount of "
+ "computational steps (0 means no bound)"),
+ cl::Hidden, cl::init(1000000), cl::ZeroOrMore, cl::cat(PollyCategory));
+
static cl::opt<bool> PollyRemarksMinimal(
"polly-remarks-minimal",
cl::desc("Do not emit remarks about assumptions that are known"),
@@ -2246,13 +2252,20 @@ void Scop::simplifyContexts() {
InvalidContext = isl_set_align_params(InvalidContext, getParamSpace());
}
+struct MinMaxData {
+ Scop::MinMaxVectorTy &MinMaxAccesses;
+ Scop &S;
+};
+
/// Add the minimal/maximal access in @p Set to @p User.
static isl_stat buildMinMaxAccess(__isl_take isl_set *Set, void *User) {
- Scop::MinMaxVectorTy *MinMaxAccesses = (Scop::MinMaxVectorTy *)User;
+ auto Data = (struct MinMaxData *)User;
+ Scop::MinMaxVectorTy *MinMaxAccesses = &Data->MinMaxAccesses;
isl_pw_multi_aff *MinPMA, *MaxPMA;
isl_pw_aff *LastDimAff;
isl_aff *OneAff;
unsigned Pos;
+ isl_ctx *Ctx = isl_set_get_ctx(Set);
Set = isl_set_remove_divs(Set);
@@ -2287,8 +2300,19 @@ static isl_stat buildMinMaxAccess(__isl_
}
}
- MinPMA = isl_set_lexmin_pw_multi_aff(isl_set_copy(Set));
- MaxPMA = isl_set_lexmax_pw_multi_aff(isl_set_copy(Set));
+ {
+ IslMaxOperationsGuard MaxOpGuard(isl_set_get_ctx(Set), OptComputeOut);
+ MinPMA = isl_set_lexmin_pw_multi_aff(isl_set_copy(Set));
+ MaxPMA = isl_set_lexmax_pw_multi_aff(isl_set_copy(Set));
+ }
+
+ if (isl_ctx_last_error(Ctx) == isl_error_quota) {
+ MinPMA = isl_pw_multi_aff_free(MinPMA);
+ MaxPMA = isl_pw_multi_aff_free(MaxPMA);
+ Set = isl_set_free(Set);
+ Data->S.invalidate(COMPLEXITY, DebugLoc());
+ return isl_stat_error;
+ }
MinPMA = isl_pw_multi_aff_coalesce(MinPMA);
MaxPMA = isl_pw_multi_aff_coalesce(MaxPMA);
@@ -2323,7 +2347,8 @@ static __isl_give isl_set *getAccessDoma
static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup, Scop &S,
Scop::MinMaxVectorTy &MinMaxAccesses) {
- MinMaxAccesses.reserve(AliasGroup.size());
+ struct MinMaxData Data = {MinMaxAccesses, S};
+ Data.MinMaxAccesses.reserve(AliasGroup.size());
isl_union_set *Domains = S.getDomains();
isl_union_map *Accesses = isl_union_map_empty(S.getParamSpace());
@@ -2335,8 +2360,8 @@ static bool calculateMinMaxAccess(Scop::
isl_union_set *Locations = isl_union_map_range(Accesses);
Locations = isl_union_set_coalesce(Locations);
Locations = isl_union_set_detect_equalities(Locations);
- bool Valid = (0 == isl_union_set_foreach_set(Locations, buildMinMaxAccess,
- &MinMaxAccesses));
+ bool Valid =
+ (0 == isl_union_set_foreach_set(Locations, buildMinMaxAccess, &Data));
isl_union_set_free(Locations);
return Valid;
}
Added: polly/trunk/test/ScopInfo/long-compile-time-alias-analysis.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/long-compile-time-alias-analysis.ll?rev=303404&view=auto
==============================================================================
--- polly/trunk/test/ScopInfo/long-compile-time-alias-analysis.ll (added)
+++ polly/trunk/test/ScopInfo/long-compile-time-alias-analysis.ll Thu May 18 22:45:00 2017
@@ -0,0 +1,235 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s
+
+; Verify that the compilation of this test case does not take infinite time.
+; At some point Polly tried to model this test case and got stuck in
+; computing a lexicographic minima. Today it should gracefully bail out.
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android"
+
+%0 = type { i8*, i64, i64, i64, i64, i64, i64 }
+
+define void @_Z1fR1SS0_Ph(%0* nocapture readonly dereferenceable(56) %arg, %0* nocapture readonly dereferenceable(56) %arg1, i8* nocapture readonly %arg2) {
+bb:
+ %tmp = getelementptr inbounds %0, %0* %arg1, i64 0, i32 1
+ %tmp3 = getelementptr inbounds %0, %0* %arg, i64 0, i32 0
+ %tmp4 = load i8*, i8** %tmp3, align 8
+ %tmp5 = getelementptr inbounds %0, %0* %arg, i64 0, i32 4
+ %tmp6 = load i64, i64* %tmp5, align 8
+ %tmp7 = getelementptr inbounds %0, %0* %arg, i64 0, i32 1
+ %tmp8 = load i64, i64* %tmp7, align 8
+ %tmp9 = mul i64 %tmp8, %tmp6
+ %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
+ %tmp11 = getelementptr inbounds %0, %0* %arg, i64 0, i32 3
+ %tmp12 = load i64, i64* %tmp11, align 8
+ %tmp13 = getelementptr inbounds i8, i8* %tmp10, i64 %tmp12
+ %tmp14 = getelementptr inbounds %0, %0* %arg, i64 0, i32 6
+ %tmp15 = load i64, i64* %tmp14, align 8
+ %tmp16 = add i64 %tmp15, 1
+ %tmp17 = icmp eq i64 %tmp16, %tmp6
+ br i1 %tmp17, label %bb51, label %bb18
+
+bb18: ; preds = %bb
+ %tmp19 = getelementptr inbounds %0, %0* %arg, i64 0, i32 2
+ %tmp20 = load i64, i64* %tmp19, align 8
+ %tmp21 = mul i64 %tmp20, %tmp8
+ %tmp22 = getelementptr inbounds i8, i8* %tmp13, i64 %tmp21
+ %tmp23 = getelementptr inbounds i8, i8* %tmp22, i64 %tmp9
+ %tmp24 = getelementptr inbounds i8, i8* %tmp23, i64 %tmp12
+ %tmp25 = bitcast %0* %arg1 to i16**
+ %tmp26 = load i16*, i16** %tmp25, align 8
+ %tmp27 = load i64, i64* %tmp, align 8
+ %tmp28 = getelementptr inbounds %0, %0* %arg1, i64 0, i32 4
+ %tmp29 = load i64, i64* %tmp28, align 8
+ %tmp30 = mul i64 %tmp27, %tmp29
+ %tmp31 = getelementptr inbounds i16, i16* %tmp26, i64 %tmp30
+ %tmp32 = getelementptr inbounds %0, %0* %arg1, i64 0, i32 3
+ %tmp33 = load i64, i64* %tmp32, align 8
+ %tmp34 = getelementptr inbounds i16, i16* %tmp31, i64 %tmp33
+ %tmp35 = getelementptr inbounds %0, %0* %arg, i64 0, i32 5
+ %tmp36 = load i64, i64* %tmp35, align 8
+ br label %bb37
+
+bb37: ; preds = %bb57, %bb18
+ %tmp38 = phi i64 [ %tmp6, %bb18 ], [ %tmp58, %bb57 ]
+ %tmp39 = phi i64 [ %tmp15, %bb18 ], [ %tmp59, %bb57 ]
+ %tmp40 = phi i64 [ %tmp27, %bb18 ], [ %tmp60, %bb57 ]
+ %tmp41 = phi i64 [ %tmp8, %bb18 ], [ %tmp61, %bb57 ]
+ %tmp42 = phi i64 [ %tmp12, %bb18 ], [ %tmp62, %bb57 ]
+ %tmp43 = phi i64 [ %tmp36, %bb18 ], [ %tmp63, %bb57 ]
+ %tmp44 = phi i16* [ %tmp34, %bb18 ], [ %tmp69, %bb57 ]
+ %tmp45 = phi i8* [ %tmp13, %bb18 ], [ %tmp64, %bb57 ]
+ %tmp46 = phi i8* [ %tmp24, %bb18 ], [ %tmp68, %bb57 ]
+ %tmp47 = phi i64 [ 0, %bb18 ], [ %tmp70, %bb57 ]
+ %tmp48 = add i64 %tmp43, 1
+ %tmp49 = sub i64 %tmp48, %tmp42
+ %tmp50 = icmp eq i64 %tmp49, 0
+ br i1 %tmp50, label %bb57, label %bb74
+
+bb51: ; preds = %bb57, %bb
+ ret void
+
+bb52: ; preds = %bb176
+ %tmp53 = load i64, i64* %tmp7, align 8
+ %tmp54 = load i64, i64* %tmp, align 8
+ %tmp55 = load i64, i64* %tmp14, align 8
+ %tmp56 = load i64, i64* %tmp5, align 8
+ br label %bb57
+
+bb57: ; preds = %bb52, %bb37
+ %tmp58 = phi i64 [ %tmp56, %bb52 ], [ %tmp38, %bb37 ]
+ %tmp59 = phi i64 [ %tmp55, %bb52 ], [ %tmp39, %bb37 ]
+ %tmp60 = phi i64 [ %tmp54, %bb52 ], [ %tmp40, %bb37 ]
+ %tmp61 = phi i64 [ %tmp53, %bb52 ], [ %tmp41, %bb37 ]
+ %tmp62 = phi i64 [ %tmp179, %bb52 ], [ %tmp42, %bb37 ]
+ %tmp63 = phi i64 [ %tmp178, %bb52 ], [ %tmp43, %bb37 ]
+ %tmp64 = getelementptr inbounds i8, i8* %tmp45, i64 %tmp61
+ %tmp65 = and i64 %tmp47, 1
+ %tmp66 = icmp eq i64 %tmp65, 0
+ %tmp67 = getelementptr inbounds i8, i8* %tmp46, i64 %tmp61
+ %tmp68 = select i1 %tmp66, i8* %tmp46, i8* %tmp67
+ %tmp69 = getelementptr inbounds i16, i16* %tmp44, i64 %tmp60
+ %tmp70 = add i64 %tmp47, 1
+ %tmp71 = add i64 %tmp59, 1
+ %tmp72 = sub i64 %tmp71, %tmp58
+ %tmp73 = icmp ult i64 %tmp70, %tmp72
+ br i1 %tmp73, label %bb37, label %bb51
+
+bb74: ; preds = %bb176, %bb37
+ %tmp75 = phi i64 [ %tmp181, %bb176 ], [ %tmp49, %bb37 ]
+ %tmp76 = phi i64 [ %tmp177, %bb176 ], [ 0, %bb37 ]
+ %tmp77 = getelementptr inbounds i8, i8* %tmp45, i64 %tmp76
+ %tmp78 = load i8, i8* %tmp77, align 1
+ %tmp79 = zext i8 %tmp78 to i32
+ %tmp80 = or i64 %tmp76, 1
+ %tmp81 = getelementptr inbounds i8, i8* %tmp45, i64 %tmp80
+ %tmp82 = load i8, i8* %tmp81, align 1
+ %tmp83 = zext i8 %tmp82 to i32
+ %tmp84 = getelementptr inbounds i8, i8* %tmp46, i64 %tmp76
+ %tmp85 = load i8, i8* %tmp84, align 1
+ %tmp86 = zext i8 %tmp85 to i32
+ %tmp87 = getelementptr inbounds i8, i8* %tmp46, i64 %tmp80
+ %tmp88 = load i8, i8* %tmp87, align 1
+ %tmp89 = zext i8 %tmp88 to i32
+ %tmp90 = mul nuw nsw i32 %tmp86, 517
+ %tmp91 = add nsw i32 %tmp90, -66176
+ %tmp92 = sub nsw i32 128, %tmp86
+ %tmp93 = mul nsw i32 %tmp92, 100
+ %tmp94 = sub nsw i32 128, %tmp89
+ %tmp95 = mul nsw i32 %tmp94, 208
+ %tmp96 = mul nuw nsw i32 %tmp89, 409
+ %tmp97 = add nsw i32 %tmp96, -52352
+ %tmp98 = mul nuw nsw i32 %tmp79, 298
+ %tmp99 = add nsw i32 %tmp98, -4768
+ %tmp100 = add nsw i32 %tmp91, %tmp99
+ %tmp101 = sdiv i32 %tmp100, 256
+ %tmp102 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %tmp99, i32 %tmp95)
+ %tmp103 = extractvalue { i32, i1 } %tmp102, 1
+ br i1 %tmp103, label %bb104, label %bb105
+
+bb104: ; preds = %bb120, %bb109, %bb105, %bb74
+ tail call void @llvm.trap()
+ unreachable
+
+bb105: ; preds = %bb74
+ %tmp106 = extractvalue { i32, i1 } %tmp102, 0
+ %tmp107 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %tmp106, i32 %tmp93)
+ %tmp108 = extractvalue { i32, i1 } %tmp107, 1
+ br i1 %tmp108, label %bb104, label %bb109
+
+bb109: ; preds = %bb105
+ %tmp110 = extractvalue { i32, i1 } %tmp107, 0
+ %tmp111 = sdiv i32 %tmp110, 256
+ %tmp112 = add nsw i32 %tmp97, %tmp99
+ %tmp113 = sdiv i32 %tmp112, 256
+ %tmp114 = mul nuw nsw i32 %tmp83, 298
+ %tmp115 = add nsw i32 %tmp114, -4768
+ %tmp116 = add nsw i32 %tmp91, %tmp115
+ %tmp117 = sdiv i32 %tmp116, 256
+ %tmp118 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %tmp115, i32 %tmp95)
+ %tmp119 = extractvalue { i32, i1 } %tmp118, 1
+ br i1 %tmp119, label %bb104, label %bb120
+
+bb120: ; preds = %bb109
+ %tmp121 = extractvalue { i32, i1 } %tmp118, 0
+ %tmp122 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %tmp121, i32 %tmp93)
+ %tmp123 = extractvalue { i32, i1 } %tmp122, 1
+ br i1 %tmp123, label %bb104, label %bb124
+
+bb124: ; preds = %bb120
+ %tmp125 = sext i32 %tmp101 to i64
+ %tmp126 = getelementptr inbounds i8, i8* %arg2, i64 %tmp125
+ %tmp127 = load i8, i8* %tmp126, align 1
+ %tmp128 = zext i8 %tmp127 to i32
+ %tmp129 = lshr i32 %tmp128, 3
+ %tmp130 = shl nuw nsw i32 %tmp129, 11
+ %tmp131 = sext i32 %tmp111 to i64
+ %tmp132 = getelementptr inbounds i8, i8* %arg2, i64 %tmp131
+ %tmp133 = load i8, i8* %tmp132, align 1
+ %tmp134 = zext i8 %tmp133 to i32
+ %tmp135 = lshr i32 %tmp134, 2
+ %tmp136 = shl nuw nsw i32 %tmp135, 5
+ %tmp137 = or i32 %tmp136, %tmp130
+ %tmp138 = sext i32 %tmp113 to i64
+ %tmp139 = getelementptr inbounds i8, i8* %arg2, i64 %tmp138
+ %tmp140 = load i8, i8* %tmp139, align 1
+ %tmp141 = zext i8 %tmp140 to i32
+ %tmp142 = lshr i32 %tmp141, 3
+ %tmp143 = or i32 %tmp137, %tmp142
+ %tmp144 = icmp ult i64 %tmp80, %tmp75
+ br i1 %tmp144, label %bb145, label %bb173
+
+bb145: ; preds = %bb124
+ %tmp146 = add nsw i32 %tmp97, %tmp115
+ %tmp147 = sdiv i32 %tmp146, 256
+ %tmp148 = sext i32 %tmp147 to i64
+ %tmp149 = getelementptr inbounds i8, i8* %arg2, i64 %tmp148
+ %tmp150 = load i8, i8* %tmp149, align 1
+ %tmp151 = extractvalue { i32, i1 } %tmp122, 0
+ %tmp152 = sdiv i32 %tmp151, 256
+ %tmp153 = sext i32 %tmp152 to i64
+ %tmp154 = getelementptr inbounds i8, i8* %arg2, i64 %tmp153
+ %tmp155 = load i8, i8* %tmp154, align 1
+ %tmp156 = sext i32 %tmp117 to i64
+ %tmp157 = getelementptr inbounds i8, i8* %arg2, i64 %tmp156
+ %tmp158 = load i8, i8* %tmp157, align 1
+ %tmp159 = zext i8 %tmp158 to i32
+ %tmp160 = lshr i32 %tmp159, 3
+ %tmp161 = shl nuw nsw i32 %tmp160, 11
+ %tmp162 = zext i8 %tmp155 to i32
+ %tmp163 = lshr i32 %tmp162, 2
+ %tmp164 = shl nuw nsw i32 %tmp163, 5
+ %tmp165 = zext i8 %tmp150 to i32
+ %tmp166 = lshr i32 %tmp165, 3
+ %tmp167 = or i32 %tmp164, %tmp166
+ %tmp168 = or i32 %tmp167, %tmp161
+ %tmp169 = shl nuw i32 %tmp168, 16
+ %tmp170 = or i32 %tmp169, %tmp143
+ %tmp171 = getelementptr inbounds i16, i16* %tmp44, i64 %tmp76
+ %tmp172 = bitcast i16* %tmp171 to i32*
+ store i32 %tmp170, i32* %tmp172, align 4
+ br label %bb176
+
+bb173: ; preds = %bb124
+ %tmp174 = trunc i32 %tmp143 to i16
+ %tmp175 = getelementptr inbounds i16, i16* %tmp44, i64 %tmp76
+ store i16 %tmp174, i16* %tmp175, align 2
+ br label %bb176
+
+bb176: ; preds = %bb173, %bb145
+ %tmp177 = add i64 %tmp76, 2
+ %tmp178 = load i64, i64* %tmp35, align 8
+ %tmp179 = load i64, i64* %tmp11, align 8
+ %tmp180 = add i64 %tmp178, 1
+ %tmp181 = sub i64 %tmp180, %tmp179
+ %tmp182 = icmp ult i64 %tmp177, %tmp181
+ br i1 %tmp182, label %bb74, label %bb52
+}
+
+; Function Attrs: noreturn nounwind
+declare void @llvm.trap() #0
+
+; Function Attrs: nounwind readnone speculatable
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1
+
+attributes #0 = { noreturn nounwind }
+attributes #1 = { nounwind readnone speculatable }
More information about the llvm-commits
mailing list