[PATCH] D80097: [PowerPC] add more high latency opcodes for machine combiner pass

Sun May 17 18:38:04 PDT 2020

shchenz created this revision.
shchenz added reviewers: hfinkel, PowerPC.
Herald added subscribers: llvm-commits, steven.zhang, wuzish, kbarton, hiraditya, nemanjai.
Herald added a project: LLVM.

This pass adds more high latency associate and commutative opcodes for machine combiner pass on PowerPC.

For now, machine combiner pass on PowerPC only reassociates the operations which have latency no less than 5 based on Power9 UM.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D80097

Files:
  llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
  llvm/test/CodeGen/PowerPC/machine-combiner.ll


Index: llvm/test/CodeGen/PowerPC/machine-combiner.ll
===================================================================

--- llvm/test/CodeGen/PowerPC/machine-combiner.ll
+++ llvm/test/CodeGen/PowerPC/machine-combiner.ll
@@ -189,9 +189,9 @@
 define i32 @reassociate_mullw(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
 ; FIXPOINT-LABEL: reassociate_mullw:
 ; FIXPOINT:       # %bb.0:
-; FIXPOINT:       mullw 3, 3, 4
-; FIXPOINT:       mullw 3, 3, 5
-; FIXPOINT:       mullw 3, 3, 6
+; FIXPOINT:       mullw [[REG0:[0-9]+]], 3, 4
+; FIXPOINT:       mullw [[REG1:[0-9]+]], 5, 6
+; FIXPOINT:       mullw 3, [[REG0]], [[REG1]]
 ; FIXPOINT-NEXT:  blr
 
   %t0 = mul i32 %x0, %x1
@@ -203,9 +203,9 @@
 define i64 @reassociate_mulld(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
 ; FIXPOINT-LABEL: reassociate_mulld:
 ; FIXPOINT:       # %bb.0:
-; FIXPOINT:       mulld 3, 3, 4
-; FIXPOINT:       mulld 3, 3, 5
-; FIXPOINT:       mulld 3, 3, 6
+; FIXPOINT:       mulld [[REG0:[0-9]+]], 3, 4
+; FIXPOINT:       mulld [[REG1:[0-9]+]], 5, 6
+; FIXPOINT:       mulld 3, [[REG0]], [[REG1]]
 ; FIXPOINT-NEXT:  blr
 
   %t0 = mul i64 %x0, %x1
Index: llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -248,10 +248,11 @@
 // This function does not list all associative and commutative operations, but
 // only those worth feeding through the machine combiner in an attempt to
 // reduce the critical path. Mostly, this means floating-point operations,
-// because they have high latencies (compared to other operations, such and
+// because they have high latencies(>=5) (compared to other operations, such as
 // and/or, which are also associative and commutative, but have low latencies).
 bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
   switch (Inst.getOpcode()) {
+  // Floating point:
   // FP Add:
   case PPC::FADD:
   case PPC::FADDS:
@@ -280,6 +281,13 @@
   case PPC::QVFMULSs:
     return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
            Inst.getFlag(MachineInstr::MIFlag::FmNsz);
+  // Fixed point:
+  // Multiply:
+  case PPC::MULHD:
+  case PPC::MULLD:
+  case PPC::MULHW:
+  case PPC::MULLW:
+    return true;
   default:
     return false;
   }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D80097.264524.patch
Type: text/x-patch
Size: 2328 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200518/19a0cc48/attachment.bin>