[polly] r245303 - Use isl_set_is_subset instead of isl_set_is_equal
Roman Gareev via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 18 09:12:06 PDT 2015
Author: romangareev
Date: Tue Aug 18 11:12:05 2015
New Revision: 245303
URL: http://llvm.org/viewvc/llvm-project?rev=245303&view=rev
Log:
Use isl_set_is_subset instead of isl_set_is_equal
It helps to detect correct strides in case of parametric constraints of Stride
in MemoryAccess::isStrideX.
Reviewers: grosser
Added:
polly/trunk/test/ScopInfo/kernel_gemm___%for.cond.1.preheader---%for.end.12.jscop
polly/trunk/test/ScopInfo/stride_detection.ll
Modified:
polly/trunk/lib/Analysis/ScopInfo.cpp
Modified: polly/trunk/lib/Analysis/ScopInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/ScopInfo.cpp?rev=245303&r1=245302&r2=245303&view=diff
==============================================================================
--- polly/trunk/lib/Analysis/ScopInfo.cpp (original)
+++ polly/trunk/lib/Analysis/ScopInfo.cpp Tue Aug 18 11:12:05 2015
@@ -609,7 +609,7 @@ bool MemoryAccess::isStrideX(__isl_take
Stride = getStride(Schedule);
StrideX = isl_set_universe(isl_set_get_space(Stride));
StrideX = isl_set_fix_si(StrideX, isl_dim_set, 0, StrideWidth);
- IsStrideX = isl_set_is_equal(Stride, StrideX);
+ IsStrideX = isl_set_is_subset(Stride, StrideX);
isl_set_free(StrideX);
isl_set_free(Stride);
Added: polly/trunk/test/ScopInfo/kernel_gemm___%for.cond.1.preheader---%for.end.12.jscop
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/kernel_gemm___%25for.cond.1.preheader---%25for.end.12.jscop?rev=245303&view=auto
==============================================================================
--- polly/trunk/test/ScopInfo/kernel_gemm___%for.cond.1.preheader---%for.end.12.jscop (added)
+++ polly/trunk/test/ScopInfo/kernel_gemm___%for.cond.1.preheader---%for.end.12.jscop Tue Aug 18 11:12:05 2015
@@ -0,0 +1,25 @@
+{
+ "context" : "[nk] -> { : nk <= 2147483647 and nk >= -2147483648 }",
+ "name" : "for.cond.1.preheader => polly.merge_new_and_old",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "read",
+ "relation" : "[nk] -> { Stmt_for_body_3[i0, i1] -> MemRef_A[i1] }"
+ },
+ {
+ "kind" : "read",
+ "relation" : "[nk] -> { Stmt_for_body_3[i0, i1] -> MemRef_C[i0] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[nk] -> { Stmt_for_body_3[i0, i1] -> MemRef_C[i0] }"
+ }
+ ],
+ "domain" : "[nk] -> { Stmt_for_body_3[i0, i1] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= -1 + nk and nk >= 1 }",
+ "name" : "Stmt_for_body_3",
+ "schedule" : "[nk] -> { Stmt_for_body_3[i0, i1] -> [o0, o1, o2, i1 - 32o1, i0 - 32o0 - 4o2] : 4o2 >= -3 + i0 - 32o0 and 4o2 <= i0 - 32o0 and 32o1 >= -31 + i1 and 32o1 <= i1 and 32o0 <= i0 and 32o0 >= -31 + i0 }"
+ }
+ ]
+}
Added: polly/trunk/test/ScopInfo/stride_detection.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/stride_detection.ll?rev=245303&view=auto
==============================================================================
--- polly/trunk/test/ScopInfo/stride_detection.ll (added)
+++ polly/trunk/test/ScopInfo/stride_detection.ll Tue Aug 18 11:12:05 2015
@@ -0,0 +1,63 @@
+; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-vectorizer=polly -polly-codegen < %s -S | FileCheck %s
+
+; #pragma known-parallel
+; for (int c0 = 0; c0 <= 31; c0 += 1)
+; for (int c1 = 0; c1 <= floord(nk - 1, 32); c1 += 1)
+; for (int c2 = 0; c2 <= 7; c2 += 1)
+; for (int c3 = 0; c3 <= min(31, nk - 32 * c1 - 1); c3 += 1)
+; #pragma simd
+; for (int c4 = 0; c4 <= 3; c4 += 1)
+; Stmt_for_body_3(32 * c0 + 4 * c2 + c4, 32 * c1 + c3);
+
+; CHECK: polly.stmt.for.body.3: ; preds = %polly.loop_header18
+; CHECK: %scevgep = getelementptr [1024 x double], [1024 x double]* %A, i64 0, i64 %21
+; CHECK: %_p_vec_p = bitcast double* %scevgep to <1 x double>*
+; CHECK: %_p_splat_one = load <1 x double>, <1 x double>* %_p_vec_p, align 8, !alias.scope !1, !noalias !3, !llvm.mem.parallel_loop_access !0
+; CHECK: %_p_splat = shufflevector <1 x double> %_p_splat_one, <1 x double> %_p_splat_one, <4 x i32> zeroinitializer
+; CHECK: %scevgep26 = getelementptr [1024 x double], [1024 x double]* %C, i64 0, i64 %19
+; CHECK: %vector_ptr = bitcast double* %scevgep26 to <4 x double>*
+; CHECK: %_p_vec_full = load <4 x double>, <4 x double>* %vector_ptr, align 8, !alias.scope !4, !noalias !5, !llvm.mem.parallel_loop_access !0
+; CHECK: %addp_vec = fadd <4 x double> %_p_splat, %_p_vec_full
+; CHECK: %40 = extractelement <4 x double> %addp_vec, i32 0
+; CHECK: %41 = extractelement <4 x double> %addp_vec, i32 1
+; CHECK: %42 = extractelement <4 x double> %addp_vec, i32 2
+; CHECK: %43 = extractelement <4 x double> %addp_vec, i32 3
+; CHECK: %vector_ptr27 = bitcast double* %scevgep26 to <4 x double>*
+; CHECK: store <4 x double> %addp_vec, <4 x double>* %vector_ptr27, align 8, !alias.scope !4, !noalias !5, !llvm.mem.parallel_loop_access !0
+
+define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, [1024 x double]* %C, [1024 x double]* %A) #0 {
+entry:
+ br label %for.cond.1.preheader
+
+for.cond.1.preheader: ; preds = %entry, %for.inc.10
+ %indvars.iv16 = phi i64 [ 0, %entry ], [ %indvars.iv.next17, %for.inc.10 ]
+ %cmp2.13 = icmp sgt i32 %nk, 0
+ br i1 %cmp2.13, label %for.body.3.lr.ph, label %for.inc.10
+
+for.body.3.lr.ph: ; preds = %for.cond.1.preheader
+ br label %for.body.3
+
+for.body.3: ; preds = %for.body.3.lr.ph, %for.body.3
+ %indvars.iv = phi i64 [ 0, %for.body.3.lr.ph ], [ %indvars.iv.next, %for.body.3 ]
+ %arrayidx5 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 0, i64 %indvars.iv
+ %0 = load double, double* %arrayidx5, align 8
+ %arrayidx9 = getelementptr inbounds [1024 x double], [1024 x double]* %C, i64 0, i64 %indvars.iv16
+ %1 = load double, double* %arrayidx9, align 8
+ %add = fadd double %0, %1
+ store double %add, double* %arrayidx9, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %nk
+ br i1 %exitcond, label %for.body.3, label %for.cond.1.for.inc.10_crit_edge
+
+for.cond.1.for.inc.10_crit_edge: ; preds = %for.body.3
+ br label %for.inc.10
+
+for.inc.10: ; preds = %for.cond.1.for.inc.10_crit_edge, %for.cond.1.preheader
+ %indvars.iv.next17 = add nuw nsw i64 %indvars.iv16, 1
+ %exitcond18 = icmp ne i64 %indvars.iv.next17, 1024
+ br i1 %exitcond18, label %for.cond.1.preheader, label %for.end.12
+
+for.end.12: ; preds = %for.inc.10
+ ret void
+}
More information about the llvm-commits
mailing list