[polly] r307163 - [PPCGCodeGeneration] Teach Polly to start using live range reordering.

Siddharth Bhat via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 5 07:57:04 PDT 2017


Author: bollu
Date: Wed Jul  5 07:57:04 2017
New Revision: 307163

URL: http://llvm.org/viewvc/llvm-project?rev=307163&view=rev
Log:
[PPCGCodeGeneration] Teach Polly to start using live range reordering.

Polly did not use PPCG's live range reordering feature. Teach
PPCGCodeGeneration to use this.

Documentation on this is sparse, so much of the code is conservative.

We currently kill all phi nodes in a Scop by appending them to the
must_kill map we pass to PPCG. I do not have a proof of correctness,
but it seems to be intuitively correct.

We also do not handle `array_order`, which, quoting PPCG, is:
PPCG/gpu.h: "Order dependences on non-scalars."
It seems to consist of RAW dependences between arrays. We need to
pass this information for more complex privatization cases.

Differential Revision: https://reviews.llvm.org/D34941

Added:
    polly/trunk/test/GPGPU/privatization-simple.ll
    polly/trunk/test/GPGPU/privatization.ll
Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
    polly/trunk/test/GPGPU/non-read-only-scalars.ll
    polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=307163&r1=307162&r2=307163&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Wed Jul  5 07:57:04 2017
@@ -112,6 +112,111 @@ static cl::opt<int>
                cl::desc("Minimal number of compute statements to run on GPU."),
                cl::Hidden, cl::init(10 * 512 * 512));
 
+/// Used to store information PPCG wants for kills. This information is
+/// used by live range reordering.
+///
+/// @see computeLiveRangeReordering
+/// @see GPUNodeBuilder::createPPCGScop
+/// @see GPUNodeBuilder::createPPCGProg
+struct MustKillsInfo {
+  /// Collection of all kill statements that will be sequenced at the end of
+  /// PPCGScop->schedule.
+  ///
+  /// The nodes in `KillsSchedule` will be merged using `isl_schedule_set`
+  /// which merges schedules in *arbitrary* order.
+  /// (we don't care about the order of the kills anyway).
+  isl::schedule KillsSchedule;
+  /// Map from kill statement instances to scalars that need to be
+  /// killed.
+  ///
+  /// We currently only derive kill information for phi nodes, as phi nodes
+  /// allow us to easily derive kill information. PHI nodes are not alive
+  /// outside the scop and can consequently all be "killed". [params] -> {
+  /// [Stmt_phantom[] -> ref_phantom[]] -> phi_ref[] }
+  isl::union_map TaggedMustKills;
+
+  MustKillsInfo() : KillsSchedule(nullptr), TaggedMustKills(nullptr){};
+};
+
+/// Compute must-kills needed to enable live range reordering with PPCG.
+///
+/// @params S The Scop to compute live range reordering information
+/// @returns live range reordering information that can be used to setup
+/// PPCG.
+static MustKillsInfo computeMustKillsInfo(const Scop &S) {
+  const isl::space ParamSpace(isl::manage(S.getParamSpace()));
+  MustKillsInfo Info;
+
+  // 1. Collect phi nodes in scop.
+  SmallVector<isl::id, 4> KillMemIds;
+  for (ScopArrayInfo *SAI : S.arrays()) {
+    if (!SAI->isPHIKind())
+      continue;
+
+    KillMemIds.push_back(isl::manage(SAI->getBasePtrId()));
+  }
+
+  Info.TaggedMustKills = isl::union_map::empty(isl::space(ParamSpace));
+
+  // Initialising KillsSchedule to `isl_set_empty` creates an empty node in the
+  // schedule:
+  //     - filter: "[control] -> { }"
+  // So, we choose to not create this to keep the output a little nicer,
+  // at the cost of some code complexity.
+  Info.KillsSchedule = nullptr;
+
+  for (isl::id &phiId : KillMemIds) {
+    isl::id KillStmtId = isl::id::alloc(
+        S.getIslCtx(), std::string("SKill_phantom_").append(phiId.get_name()),
+        nullptr);
+
+    // NOTE: construction of tagged_must_kill:
+    // 2. We need to construct a map:
+    //     [param] -> { [Stmt_phantom[] -> ref_phantom[]] -> phi_ref }
+    // To construct this, we use `isl_map_domain_product` on 2 maps`:
+    // 2a. StmtToPhi:
+    //         [param] -> { Stmt_phantom[] -> phi_ref[] }
+    // 2b. PhantomRefToPhi:
+    //         [param] -> { ref_phantom[] -> phi_ref[] }
+    //
+    // Combining these with `isl_map_domain_product` gives us
+    // TaggedMustKill:
+    //     [param] -> { [Stmt[] -> phantom_ref[]] -> memref[] }
+
+    // 2a. [param] -> { S_2[] -> phi_ref[] }
+    isl::map StmtToPhi = isl::map::universe(isl::space(ParamSpace));
+    StmtToPhi = StmtToPhi.set_tuple_id(isl::dim::in, isl::id(KillStmtId));
+    StmtToPhi = StmtToPhi.set_tuple_id(isl::dim::out, isl::id(phiId));
+
+    isl::id PhantomRefId = isl::id::alloc(
+        S.getIslCtx(), std::string("ref_phantom") + phiId.get_name(), nullptr);
+
+    // 2b. [param] -> { phantom_ref[] -> memref[] }
+    isl::map PhantomRefToPhi = isl::map::universe(isl::space(ParamSpace));
+    PhantomRefToPhi = PhantomRefToPhi.set_tuple_id(isl::dim::in, PhantomRefId);
+    PhantomRefToPhi = PhantomRefToPhi.set_tuple_id(isl::dim::out, phiId);
+
+    // 2. [param] -> { [Stmt[] -> phantom_ref[]] -> memref[] }
+    isl::map TaggedMustKill = StmtToPhi.domain_product(PhantomRefToPhi);
+    Info.TaggedMustKills = Info.TaggedMustKills.unite(TaggedMustKill);
+
+    // 3. Create the kill schedule of the form:
+    //     "[param] -> { Stmt_phantom[] }"
+    // Then add this to Info.KillsSchedule.
+    isl::space KillStmtSpace = ParamSpace;
+    KillStmtSpace = KillStmtSpace.set_tuple_id(isl::dim::set, KillStmtId);
+    isl::union_set KillStmtDomain = isl::set::universe(KillStmtSpace);
+
+    isl::schedule KillSchedule = isl::schedule::from_domain(KillStmtDomain);
+    if (Info.KillsSchedule)
+      Info.KillsSchedule = Info.KillsSchedule.set(KillSchedule);
+    else
+      Info.KillsSchedule = KillSchedule;
+  }
+
+  return Info;
+}
+
 /// Create the ast expressions for a ScopStmt.
 ///
 /// This function is a callback for to generate the ast expressions for each
@@ -2114,6 +2219,8 @@ public:
     auto PPCGScop = (ppcg_scop *)malloc(sizeof(ppcg_scop));
 
     PPCGScop->options = createPPCGOptions();
+    // enable live range reordering
+    PPCGScop->options->live_range_reordering = 1;
 
     PPCGScop->start = 0;
     PPCGScop->end = 0;
@@ -2129,10 +2236,9 @@ public:
     PPCGScop->tagged_must_writes = getTaggedMustWrites();
     PPCGScop->must_writes = S->getMustWrites();
     PPCGScop->live_out = nullptr;
-    PPCGScop->tagged_must_kills = isl_union_map_empty(S->getParamSpace());
     PPCGScop->tagger = nullptr;
-
-    PPCGScop->independence = nullptr;
+    PPCGScop->independence =
+        isl_union_map_empty(isl_set_get_space(PPCGScop->context));
     PPCGScop->dep_flow = nullptr;
     PPCGScop->tagged_dep_flow = nullptr;
     PPCGScop->dep_false = nullptr;
@@ -2141,8 +2247,15 @@ public:
     PPCGScop->tagged_dep_order = nullptr;
 
     PPCGScop->schedule = S->getScheduleTree();
-    PPCGScop->names = getNames();
 
+    MustKillsInfo KillsInfo = computeMustKillsInfo(*S);
+    // If we have something non-trivial to kill, add it to the schedule
+    if (KillsInfo.KillsSchedule.get())
+      PPCGScop->schedule = isl_schedule_sequence(
+          PPCGScop->schedule, KillsInfo.KillsSchedule.take());
+    PPCGScop->tagged_must_kills = KillsInfo.TaggedMustKills.take();
+
+    PPCGScop->names = getNames();
     PPCGScop->pet = nullptr;
 
     compute_tagger(PPCGScop);
@@ -2414,7 +2527,13 @@ public:
     PPCGProg->to_inner = getArrayIdentity();
     PPCGProg->to_outer = getArrayIdentity();
     PPCGProg->any_to_outer = nullptr;
-    PPCGProg->array_order = nullptr;
+
+    // this needs to be set when live range reordering is enabled.
+    // NOTE: I believe that is conservatively correct. I'm not sure
+    //       what the semantics of this is.
+    // Quoting PPCG/gpu.h: "Order dependences on non-scalars."
+    PPCGProg->array_order =
+        isl_union_map_empty(isl_set_get_space(PPCGScop->context));
     PPCGProg->n_stmts = std::distance(S->begin(), S->end());
     PPCGProg->stmts = getStatements();
     PPCGProg->n_array = std::distance(S->array_begin(), S->array_end());
@@ -2424,7 +2543,6 @@ public:
     createArrays(PPCGProg);
 
     PPCGProg->may_persist = compute_may_persist(PPCGProg);
-
     return PPCGProg;
   }
 

Modified: polly/trunk/test/GPGPU/non-read-only-scalars.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/non-read-only-scalars.ll?rev=307163&r1=307162&r2=307163&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/non-read-only-scalars.ll (original)
+++ polly/trunk/test/GPGPU/non-read-only-scalars.ll Wed Jul  5 07:57:04 2017
@@ -67,7 +67,6 @@
 
 ; CODE:   }
 ; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (32) * sizeof(float), cudaMemcpyDeviceToHost));
-; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(&MemRef_sum_0__phi, dev_MemRef_sum_0__phi, sizeof(float), cudaMemcpyDeviceToHost));
 ; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(&MemRef_sum_0, dev_MemRef_sum_0, sizeof(float), cudaMemcpyDeviceToHost));
 ; CODE-NEXT: }
 

Modified: polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll?rev=307163&r1=307162&r2=307163&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll (original)
+++ polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll Wed Jul  5 07:57:04 2017
@@ -24,9 +24,8 @@ target triple = "x86_64-unknown-linux-gn
 ; CODE-NEXT:     cudaCheckKernel();
 ; CODE-NEXT:   }
 
-; CODE:   cudaCheckReturn(cudaMemcpy(&MemRef_out_l_055__phi, dev_MemRef_out_l_055__phi, sizeof(i32), cudaMemcpyDeviceToHost));
-; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(&MemRef_out_l_055, dev_MemRef_out_l_055, sizeof(i32), cudaMemcpyDeviceToHost));
-; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(MemRef_c, dev_MemRef_c, (50) * sizeof(i32), cudaMemcpyDeviceToHost));
+; CODE:       cudaCheckReturn(cudaMemcpy(&MemRef_out_l_055, dev_MemRef_out_l_055, sizeof(i32), cudaMemcpyDeviceToHost));
+; CODE-NEXT:  cudaCheckReturn(cudaMemcpy(MemRef_c, dev_MemRef_c, (50) * sizeof(i32), cudaMemcpyDeviceToHost));
 ; CODE-NEXT: }
 
 ; CODE: # kernel0
@@ -41,9 +40,7 @@ target triple = "x86_64-unknown-linux-gn
 ; IR:      [[REGA:%.+]] = bitcast i32* %out_l.055.phiops to i8*
 ; IR-NEXT: call void @polly_copyFromHostToDevice(i8* [[REGA]], i8* %p_dev_array_MemRef_out_l_055__phi, i64 4)
 
-; IR:      [[REGB:%.+]] = bitcast i32* %out_l.055.phiops to i8*
-; IR-NEXT: call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_out_l_055__phi, i8* [[REGB]], i64 4)
-; IR-NEXT: [[REGC:%.+]] = bitcast i32* %out_l.055.s2a to i8*
+; IR: [[REGC:%.+]] = bitcast i32* %out_l.055.s2a to i8*
 ; IR-NEXT: call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_out_l_055, i8* [[REGC]], i64 4)
 
 ; KERNEL-IR: entry:

Added: polly/trunk/test/GPGPU/privatization-simple.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/privatization-simple.ll?rev=307163&view=auto
==============================================================================
--- polly/trunk/test/GPGPU/privatization-simple.ll (added)
+++ polly/trunk/test/GPGPU/privatization-simple.ll Wed Jul  5 07:57:04 2017
@@ -0,0 +1,56 @@
+; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s -check-prefix=SCOP
+; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s -check-prefix=HOST-IR
+
+; SCOP:      Function: f
+; SCOP-NEXT: Region: %for.body---%for.end
+; SCOP-NEXT: Max Loop Depth:  1
+
+; Check that kernel launch is generated in host IR.
+; the declare would not be generated unless a call to a kernel exists.
+; HOST-IR: declare void @polly_launchKernel(i8*, i32, i32, i32, i32, i32, i8*)
+
+; void f(int A[], int B[], int control, int C[]) {
+;     int x;
+; #pragma scop
+;     for(int i = 0; i < 1000; i ++) {
+;         x = 0;
+;         if(control) x = C[i];
+;         B[i] = x * A[i];
+; 
+;     }
+; #pragma endscop
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32* %B, i32 %control, i32* %C) {
+entry:
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %entry.split, %if.end
+  %indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %if.end ]
+  %tobool = icmp eq i32 %control, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %arrayidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+  %tmp4 = load i32, i32* %arrayidx, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %for.body, %if.then
+  %x.0 = phi i32 [ %tmp4, %if.then ], [ 0, %for.body ]
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp8 = load i32, i32* %arrayidx2, align 4
+  %mul = mul nsw i32 %tmp8, %x.0
+  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  store i32 %mul, i32* %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:                                          ; preds = %if.end
+  ret void
+}

Added: polly/trunk/test/GPGPU/privatization.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/privatization.ll?rev=307163&view=auto
==============================================================================
--- polly/trunk/test/GPGPU/privatization.ll (added)
+++ polly/trunk/test/GPGPU/privatization.ll Wed Jul  5 07:57:04 2017
@@ -0,0 +1,60 @@
+; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s -check-prefix=SCOP
+; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s -check-prefix=HOST-IR
+
+; SCOP:      Function: checkPrivatization
+; SCOP-NEXT: Region: %for.body---%for.end
+; SCOP-NEXT: Max Loop Depth:  1
+
+
+; Check that kernel launch is generated in host IR.
+; the declare would not be generated unless a call to a kernel exists.
+; HOST-IR: declare void @polly_launchKernel(i8*, i32, i32, i32, i32, i32, i8*)
+
+; 
+;
+;    void checkPrivatization(int A[], int B[], int C[], int control) {
+;      int x;
+;    #pragma scop
+;      for (int i = 0; i < 1000; i++) {
+;        x = 0;
+;        if (control)
+;          x += C[i];
+;
+;        B[i] = x * A[i];
+;      }
+;    #pragma endscop
+;    }
+;
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @checkPrivatization(i32* %A, i32* %B, i32* %C, i32 %control) {
+entry:
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %entry.split, %if.end
+  %indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %if.end ]
+  %tobool = icmp eq i32 %control, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %arrayidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+  %tmp4 = load i32, i32* %arrayidx, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %for.body, %if.then
+  %x.0 = phi i32 [ %tmp4, %if.then ], [ 0, %for.body ]
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp9 = load i32, i32* %arrayidx2, align 4
+  %mul = mul nsw i32 %tmp9, %x.0
+  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  store i32 %mul, i32* %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:                                          ; preds = %if.end
+  ret void
+}




More information about the llvm-commits mailing list