[llvm] r327742 - [Hexagon] Avoid bank conflicts in post-RA scheduler

Krzysztof Parzyszek via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 16 13:55:49 PDT 2018


Author: kparzysz
Date: Fri Mar 16 13:55:49 2018
New Revision: 327742

URL: http://llvm.org/viewvc/llvm-project?rev=327742&view=rev
Log:
[Hexagon] Avoid bank conflicts in post-RA scheduler

Avoid scheduling two loads in such a way that they would end up in the
same packet. If there is a load in a packet, try to schedule a non-load
next.

Patch by Brendon Cahoon.

Added:
    llvm/trunk/test/CodeGen/Hexagon/bank-conflict.mir
Modified:
    llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
    llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.h

Modified: llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.cpp?rev=327742&r1=327741&r2=327742&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.cpp Fri Mar 16 13:55:49 2018
@@ -31,6 +31,7 @@ void HexagonHazardRecognizer::Reset() {
   PacketNum = 0;
   UsesDotCur = nullptr;
   DotCurPNum = -1;
+  UsesLoad = false;
   RegDefs.clear();
 }
 
@@ -78,15 +79,20 @@ void HexagonHazardRecognizer::AdvanceCyc
     UsesDotCur = nullptr;
     DotCurPNum = -1;
   }
+  UsesLoad = false;
   PacketNum++;
   RegDefs.clear();
 }
 
-/// If a packet contains a dot cur instruction, then we may prefer the
-/// instruction that can use the dot cur result. Or, if the use
-/// isn't scheduled in the same packet, then prefer other instructions
-/// in the subsequent packet.
+/// Handle the cases when we prefer one instruction over another. Case 1 - we
+/// prefer not to generate multiple loads in the packet to avoid a potential
+/// bank conflict. Case 2 - if a packet contains a dot cur instruction, then we
+/// prefer the instruction that can use the dot cur result. However, if the use
+/// is not scheduled in the same packet, then prefer other instructions in the
+/// subsequent packet.
 bool HexagonHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+  if (UsesLoad && SU->isInstr() && SU->getInstr()->mayLoad())
+    return true;
   return UsesDotCur && ((SU == UsesDotCur) ^ (DotCurPNum == (int)PacketNum));
 }
 
@@ -137,4 +143,6 @@ void HexagonHazardRecognizer::EmitInstru
     UsesDotCur = nullptr;
     DotCurPNum = -1;
   }
+
+  UsesLoad = MI->mayLoad();
 }

Modified: llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.h?rev=327742&r1=327741&r2=327742&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.h (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.h Fri Mar 16 13:55:49 2018
@@ -30,6 +30,8 @@ class HexagonHazardRecognizer : public S
   // The packet number when a dor cur is emitted. If its use is not generated
   // in the same packet, then try to wait another cycle before emitting.
   int DotCurPNum;
+  // Does the packet contain a load. Used to restrict another load, if possible.
+  bool UsesLoad = false;
   // The set of registers defined by instructions in the current packet.
   SmallSet<unsigned, 8> RegDefs;
 

Added: llvm/trunk/test/CodeGen/Hexagon/bank-conflict.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/bank-conflict.mir?rev=327742&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/bank-conflict.mir (added)
+++ llvm/trunk/test/CodeGen/Hexagon/bank-conflict.mir Fri Mar 16 13:55:49 2018
@@ -0,0 +1,156 @@
+# RUN: llc -march=hexagon -run-pass post-RA-sched %s -o - | FileCheck %s
+
+# Test that the Post RA scheduler does not schedule back-to-back loads
+# when there is another instruction to schedule. The scheduler avoids
+# the back-to-back loads to reduce potential bank conflicts.
+
+# CHECK: = L2_loadrigp
+# CHECK: = A2_tfr
+# CHECK: = L2_loadrigp
+
+# CHECK: = L4_loadri_rr
+# CHECK: = S2_tstbit_i
+# CHECK: = L4_loadri_rr
+
+--- |
+  %s.0 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %s.1], [5 x i32] }
+  %s.1 = type { i32, i32 }
+
+  @g0 = global i64 0
+  @g1 = global i64 0
+  @g2 = global i32 0
+  @g3 = global i32 0
+  @g4 = global i8 0
+
+  declare i32 @llvm.hexagon.S2.cl0(i32) #0
+  declare i32 @llvm.hexagon.S2.setbit.r(i32, i32) #0
+  declare i64 @llvm.hexagon.M2.vmpy2s.s0(i32, i32) #0
+  declare i64 @llvm.hexagon.M2.vmac2s.s0(i64, i32, i32) #0
+  declare i64 @llvm.hexagon.A2.vaddws(i64, i64) #0
+  declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) #0
+
+  define void @f0(i32 %a0) {
+  b0:
+    %v0 = bitcast [10 x %s.0]* inttoptr (i32 -121502345 to [10 x %s.0]*) to [10 x %s.0]*
+    br label %b1
+
+  b1:                                               ; preds = %b5, %b0
+    %v1 = phi i32 [ 0, %b0 ], [ %v28, %b5 ]
+    %v2 = phi i32 [ 0, %b0 ], [ %v27, %b5 ]
+    %v3 = load i32, i32* @g2, align 4
+    %v4 = load i32, i32* @g3, align 8
+    %v5 = and i32 %v4, %v3
+    %v6 = getelementptr [10 x %s.0], [10 x %s.0]* %v0, i32 0, i32 %v2
+    %v7 = bitcast %s.0* %v6 to %s.0*
+    %v8 = getelementptr %s.0, %s.0* %v7, i32 0, i32 12
+    %v9 = getelementptr %s.0, %s.0* %v7, i32 0, i32 13
+    br label %b2
+
+  b2:                                               ; preds = %b4, %b1
+    %v10 = phi i64 [ %v24, %b4 ], [ 0, %b1 ]
+    %v11 = phi i32 [ %v13, %b4 ], [ %v5, %b1 ]
+    %v12 = tail call i32 @llvm.hexagon.S2.cl0(i32 %v11)
+    %v13 = tail call i32 @llvm.hexagon.S2.setbit.r(i32 %v11, i32 %v12)
+    %v14 = getelementptr [24 x i32], [24 x i32]* %v8, i32 0, i32 %v12
+    %v15 = load i32, i32* %v14, align 4
+    %v16 = tail call i64 @llvm.hexagon.M2.vmpy2s.s0(i32 %v15, i32 %v15)
+    %v17 = getelementptr [24 x i32], [24 x i32]* %v9, i32 0, i32 %v12
+    %v18 = load i32, i32* %v17, align 4
+    %v19 = tail call i64 @llvm.hexagon.M2.vmac2s.s0(i64 %v16, i32 %v18, i32 %v18)
+    %v20 = load i8, i8* @g4, align 1
+    %v21 = and i8 %v20, 1
+    %v22 = icmp eq i8 %v21, 0
+    br i1 %v22, label %b3, label %b4
+
+  b3:                                               ; preds = %b2
+    %v23 = tail call i64 @llvm.hexagon.A2.vaddws(i64 %v10, i64 %v19)
+    store i64 %v23, i64* @g0, align 8
+    br label %b4
+
+  b4:                                               ; preds = %b3, %b2
+    %v24 = phi i64 [ %v23, %b3 ], [ %v10, %b2 ]
+    %v25 = icmp eq i32 %v13, 0
+    br i1 %v25, label %b5, label %b2
+
+  b5:                                               ; preds = %b4
+    %v26 = add i32 %v2, 1
+    %v27 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %v26, i32 10)
+    %v28 = add i32 %v1, 1
+    %v29 = icmp eq i32 %v28, %a0
+    br i1 %v29, label %b6, label %b1
+
+  b6:                                               ; preds = %b5
+    store i64 %v19, i64* @g1, align 8
+    ret void
+  }
+
+  attributes #0 = { nounwind readnone }
+
+...
+---
+name:            f0
+alignment:       4
+tracksRegLiveness: true
+registers:
+liveins:
+  - { reg: '$r0', virtual-reg: '' }
+fixedStack:
+stack:
+constants:
+body:             |
+  bb.0:
+    successors: %bb.1(0x80000000)
+    liveins: $r0:0x00000001
+
+    $r3 = A2_tfrsi 0
+    $r2 = A2_tfrsi -121502345
+    $r4 = A2_tfrsi 10
+    J2_loop0r %bb.1, killed $r0, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+
+  bb.1 (address-taken):
+    successors: %bb.2(0x80000000)
+    liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $sa0:0x00000004
+
+    $r5 = M2_mpysip $r3, 1824
+    $r7 = L2_loadrigp @g2, implicit $gp :: (dereferenceable load 4 from @g2)
+    $r8 = L2_loadrigp @g3, implicit killed $gp :: (dereferenceable load 4 from @g3, align 8)
+    $r6 = A2_tfr $r5
+    $r7 = A2_and killed $r8, killed $r7
+    $r5 = M2_accii killed $r5, $r2, 1248
+    $r6 = M2_accii killed $r6, $r2, 1152
+    $d0 = A2_tfrpi 0
+
+  bb.2:
+    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+    liveins: $lc0:0x00000004, $r0:0x00000001, $r1:0x00000001, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $r5:0x00000001, $r6:0x00000001, $r7:0x00000001, $sa0:0x00000004
+
+    $r8 = S2_cl0 $r7
+    $r12 = L2_loadrubgp @g4, implicit $gp :: (dereferenceable load 1 from @g4)
+    $r7 = S2_setbit_r killed $r7, $r8
+    $r9 = L4_loadri_rr $r6, $r8, 2 :: (load 4 from %ir.v14)
+    $r13 = L4_loadri_rr $r5, killed $r8, 2 :: (load 4 from %ir.v17)
+    $d4 = M2_vmpy2s_s0 killed $r9, $r9, implicit-def dead $usr_ovf
+    $p0 = S2_tstbit_i killed $r12, 0
+    $d4 = M2_vmac2s_s0 killed $d4, killed $r13, $r13, implicit-def dead $usr_ovf
+    $p1 = C2_cmpeqi $r7, 0
+    $d6 = A2_vaddws $d0, $d4, implicit-def dead $usr_ovf
+    $d0 = A2_tfrpt $p0, killed $d0, implicit $d0
+    S4_pstorerdf_abs $p0, @g0, $d6, implicit killed $gp :: (store 8 into @g0)
+    $d0 = A2_tfrpf killed $p0, killed $d6, implicit killed $d0
+    J2_jumpf killed $p1, %bb.2, implicit-def dead $pc
+
+  bb.3:
+    successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+    liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $r8:0x00000001, $r9:0x00000001, $sa0:0x00000004
+
+    $r3 = A2_addi killed $r3, 1
+    $r3 = A4_modwrapu killed $r3, $r4
+    ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+
+  bb.4:
+    liveins: $r8:0x00000001, $r9:0x00000001
+
+    S2_storerdgp @g1, killed $d4, implicit killed $gp :: (store 8 into @g1)
+    PS_jmpret killed $r31, implicit-def dead $pc
+...
+




More information about the llvm-commits mailing list