[llvm] r343804 - [globalisel][combine] Improve the truncate placement for the extending-loads combine

Daniel Sanders via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 4 11:44:59 PDT 2018


Author: dsanders
Date: Thu Oct  4 11:44:58 2018
New Revision: 343804

URL: http://llvm.org/viewvc/llvm-project?rev=343804&view=rev
Log:
[globalisel][combine] Improve the truncate placement for the extending-loads combine

This brings the extending loads patch back to the original intent but minus the
PHI bug and with another small improvement to de-dupe truncates that are
inserted into the same block.

The truncates are sunk to their uses unless this would require inserting before a
phi in which case it sinks to the _beginning_ of the predecessor block for that
path (but no earlier than the def).

The reason for choosing the beginning of the predecessor is that it makes de-duping
multiple truncates in the same block simple, and optimized code is going to run a
scheduler at some point which will likely change the position anyway.


Modified:
    llvm/trunk/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir

Modified: llvm/trunk/lib/CodeGen/GlobalISel/CombinerHelper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/GlobalISel/CombinerHelper.cpp?rev=343804&r1=343803&r2=343804&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/GlobalISel/CombinerHelper.cpp (original)
+++ llvm/trunk/lib/CodeGen/GlobalISel/CombinerHelper.cpp Thu Oct  4 11:44:58 2018
@@ -105,6 +105,37 @@ PreferredTuple ChoosePreferredUse(Prefer
   }
   return CurrentUse;
 }
+
+/// Find a suitable place to insert some instructions and insert them. This
+/// function accounts for special cases like inserting before a PHI node.
+/// The current strategy for inserting before PHI's is to duplicate the
+/// instructions for each predecessor. However, while that's ok for G_TRUNC
+/// on most targets since it generally requires no code, other targets/cases may
+/// want to try harder to find a dominating block.
+static void InsertInsnsWithoutSideEffectsBeforeUse(
+    MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
+    std::function<void(MachineBasicBlock::iterator)> Inserter) {
+  MachineInstr &UseMI = *UseMO.getParent();
+
+  MachineBasicBlock *InsertBB = UseMI.getParent();
+
+  // If the use is a PHI then we want the predecessor block instead.
+  if (UseMI.isPHI()) {
+    MachineOperand *PredBB = std::next(&UseMO);
+    InsertBB = PredBB->getMBB();
+  }
+
+  // If the block is the same block as the def then we want to insert just after
+  // the def instead of at the start of the block.
+  if (InsertBB == DefMI.getParent()) {
+    MachineBasicBlock::iterator InsertPt = &DefMI;
+    Inserter(std::next(InsertPt));
+    return;
+  }
+
+  // Otherwise we want the start of the BB
+  Inserter(InsertBB->getFirstNonPHI());
+}
 } // end anonymous namespace
 
 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
@@ -153,22 +184,6 @@ bool CombinerHelper::tryCombineExtending
   // type since by definition the result of an extend is larger.
   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
 
-  // Rewrite the load and schedule the canonical use for erasure.
-  const auto TruncateUse = [&MI](MachineIRBuilder &Builder,
-                                 MachineOperand &UseMO, unsigned DstReg,
-                                 unsigned SrcReg) {
-    MachineInstr &UseMI = *UseMO.getParent();
-    MachineBasicBlock &UseMBB = *UseMI.getParent();
-
-    Builder.setInsertPt(UseMBB, MachineBasicBlock::iterator(UseMI));
-
-    if (UseMI.isPHI())
-      Builder.setInsertPt(*MI.getParent(),
-                          std::next(MachineBasicBlock::iterator(MI)));
-
-    Builder.buildTrunc(DstReg, SrcReg);
-  };
-
   // Rewrite the load to the chosen extending load.
   unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg();
   MI.setDesc(
@@ -180,7 +195,7 @@ bool CombinerHelper::tryCombineExtending
 
   // Rewrite all the uses to fix up the types.
   SmallVector<MachineInstr *, 1> ScheduleForErase;
-  SmallVector<std::pair<MachineOperand*, unsigned>, 4> ScheduleForAssignReg;
+  SmallVector<std::pair<MachineOperand *, MachineInstr *>, 4> ScheduleForInsert;
   for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) {
     MachineInstr *UseMI = UseMO.getParent();
 
@@ -204,7 +219,6 @@ bool CombinerHelper::tryCombineExtending
           //    ... = ... %2(s32)
           MRI.replaceRegWith(UseDstReg, ChosenDstReg);
           ScheduleForErase.push_back(UseMO.getParent());
-          Observer.erasedInstr(*UseMO.getParent());
         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
           // If the preferred size is smaller, then keep the extend but extend
           // from the result of the extending load. For example:
@@ -229,9 +243,11 @@ bool CombinerHelper::tryCombineExtending
           //    %4:_(s8) = G_TRUNC %2:_(s32)
           //    %3:_(s64) = G_ZEXT %2:_(s8)
           //    ... = ... %3(s64)
-          unsigned NewVReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
-          TruncateUse(Builder, UseMO, NewVReg, ChosenDstReg);
-          ScheduleForAssignReg.emplace_back(&UseMO, NewVReg);
+          InsertInsnsWithoutSideEffectsBeforeUse(
+              Builder, MI, UseMO,
+              [&](MachineBasicBlock::iterator InsertBefore) {
+                ScheduleForInsert.emplace_back(&UseMO, &*InsertBefore);
+              });
         }
         continue;
       }
@@ -239,20 +255,40 @@ bool CombinerHelper::tryCombineExtending
       // We're going to update the load to def this value later so just erase
       // the old extend.
       ScheduleForErase.push_back(UseMO.getParent());
-      Observer.erasedInstr(*UseMO.getParent());
       continue;
     }
 
     // The use isn't an extend. Truncate back to the type we originally loaded.
     // This is free on many targets.
-    unsigned NewVReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
-    TruncateUse(Builder, UseMO, NewVReg, ChosenDstReg);
-    ScheduleForAssignReg.emplace_back(&UseMO, NewVReg);
-  }
-  for (auto &Assignment : ScheduleForAssignReg)
-    Assignment.first->setReg(Assignment.second);
-  for (auto &EraseMI : ScheduleForErase)
+    InsertInsnsWithoutSideEffectsBeforeUse(
+        Builder, MI, UseMO, [&](MachineBasicBlock::iterator InsertBefore) {
+          ScheduleForInsert.emplace_back(&UseMO, &*InsertBefore);
+        });
+  }
+
+  DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
+  for (auto &InsertionInfo : ScheduleForInsert) {
+    MachineOperand *UseMO = InsertionInfo.first;
+    MachineInstr *InsertBefore = InsertionInfo.second;
+
+    MachineInstr *PreviouslyEmitted =
+        EmittedInsns.lookup(InsertBefore->getParent());
+    if (PreviouslyEmitted) {
+      UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg());
+      continue;
+    }
+
+    Builder.setInsertPt(*InsertBefore->getParent(), InsertBefore);
+    unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
+    MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
+    EmittedInsns[InsertBefore->getParent()] = NewMI;
+    UseMO->setReg(NewDstReg);
+    Observer.createdInstr(*NewMI);
+  }
+  for (auto &EraseMI : ScheduleForErase) {
     EraseMI->eraseFromParent();
+    Observer.erasedInstr(*EraseMI);
+  }
   MI.getOperand(0).setReg(ChosenDstReg);
 
   return true;

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir?rev=343804&r1=343803&r2=343804&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir Thu Oct  4 11:44:58 2018
@@ -3,6 +3,7 @@
 --- |
   target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
   target triple = "aarch64--"
+
   define void @multiple_copies(i8* %addr) {
   entry:
     br i1 0, label %if, label %else
@@ -14,7 +15,7 @@
     ret void
   }
 
-  define void @sink_to_phi(i8* %addr) {
+  define void @sink_to_phi_trivially_dominating(i8* %addr) {
   entry:
     br i1 0, label %if, label %exit
   if:
@@ -22,6 +23,18 @@
   exit:
     ret void
   }
+
+  define void @sink_to_phi_nondominating(i8* %addr) {
+  entry:
+    br i1 0, label %if, label %else
+  if:
+    br label %exit
+  else:
+    br label %exit
+  exit:
+    ret void
+  }
+
 ...
 
 ---
@@ -70,8 +83,8 @@ body: |
 ...
 
 ---
-name:            sink_to_phi
-# CHECK-LABEL: name: sink_to_phi
+name:            sink_to_phi_trivially_dominating
+# CHECK-LABEL: name: sink_to_phi_trivially_dominating
 # This test currently tests that we don't sink if we would sink to a phi. This
 # is needed to avoid inserting into the middle of the leading G_PHI instructions
 # of a BB
@@ -109,3 +122,53 @@ body: |
     $w0 = COPY %3
     $w1 = COPY %9
 ...
+
+---
+name:            sink_to_phi_nondominating
+# CHECK-LABEL: name: sink_to_phi_nondominating
+# This test currently tests that we don't sink if we would sink to a phi. This
+# is needed to avoid inserting into the middle of the leading G_PHI instructions
+# of a BB
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x0, $w1
+    successors: %bb.1(0x40000000), %bb.2(0x40000000); %bb.1(50.00%), %bb.2(50.00%)
+    ; CHECK: [[T0:%[0-9]+]]:_(s32) = G_SEXTLOAD
+    %0:_(p0) = COPY $x0
+    %1:_(s32) = COPY $w1
+    %2:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr)
+    %3:_(s32) = G_SEXT %2
+    %4:_(s32) = G_CONSTANT i32 1
+    %5:_(s1) = G_ICMP intpred(ne), %1:_(s32), %4:_
+    G_BRCOND %5:_(s1), %bb.1
+    G_BR %bb.2.else
+  bb.1.if:
+  ; CHECK: bb.1.if:
+    successors: %bb.3(0x80000000)
+    %10:_(s8) = G_CONSTANT i8 1
+    ; CHECK: [[T1:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
+    %6:_(s8) = G_ADD %2, %10
+    ; CHECK: [[T2:%[0-9]+]]:_(s8) = G_ADD [[T1]], {{.*}}
+    G_BR %bb.3.exit
+  bb.2.else:
+  ; CHECK: bb.2.else:
+    successors: %bb.3(0x80000000)
+    %11:_(s8) = G_CONSTANT i8 1
+    ; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
+    %7:_(s8) = G_SUB %2, %11
+    ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
+    G_BR %bb.3.exit
+  bb.3.exit:
+  ; CHECK: bb.3.exit:
+    %8:_(s8) = G_PHI %6:_(s8), %bb.1, %7:_(s8), %bb.2
+    ; CHECK: [[T5:%[0-9]+]]:_(s8) = G_PHI [[T2]](s8), %bb.1, [[T4]](s8)
+    %9:_(s32) = G_ZEXT %8
+    ; CHECK: [[T6:%[0-9]+]]:_(s32) = G_ZEXT [[T5]](s8)
+    ; CHECK: $w0 = COPY [[T0]](s32)
+    ; CHECK: $w1 = COPY [[T6]](s32)
+    $w0 = COPY %3
+    $w1 = COPY %9
+...
+
+

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir?rev=343804&r1=343803&r2=343804&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir Thu Oct  4 11:44:58 2018
@@ -288,8 +288,7 @@ body: |
     ; CHECK: [[T1:%[0-9]+]]:_(s64) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr)
     ; CHECK: [[T2:%[0-9]+]]:_(s8) = G_TRUNC [[T1]]
     ; CHECK: [[T3:%[0-9]+]]:_(s32) = G_ANYEXT [[T2]]
-    ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_TRUNC [[T1]]
-    ; CHECK: [[T5:%[0-9]+]]:_(s32) = G_ANYEXT [[T4]]
+    ; CHECK: [[T5:%[0-9]+]]:_(s32) = G_ANYEXT [[T2]]
     ; CHECK: $w0 = COPY [[T3]](s32)
     ; CHECK: $x1 = COPY [[T1]](s64)
     ; CHECK: $w2 = COPY [[T5]](s32)




More information about the llvm-commits mailing list