[llvm] r351139 - [AArch64] Add new target feature to fuse arithmetic and logic operations

Evandro Menezes via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 14 15:54:36 PST 2019


Author: evandro
Date: Mon Jan 14 15:54:36 2019
New Revision: 351139

URL: http://llvm.org/viewvc/llvm-project?rev=351139&view=rev
Log:
[AArch64] Add new target feature to fuse arithmetic and logic operations

This feature enables the fusion of some arithmetic and logic instructions
together.

Differential revision: https://reviews.llvm.org/D56572

Added:
    llvm/trunk/test/CodeGen/AArch64/misched-fusion-arith-logic.mir
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64.td
    llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp
    llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h

Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=351139&r1=351138&r2=351139&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.td Mon Jan 14 15:54:36 2019
@@ -188,14 +188,18 @@ def FeatureFuseAES : SubtargetFeature<
     "fuse-aes", "HasFuseAES", "true",
     "CPU fuses AES crypto operations">;
 
-def FeatureFuseCryptoEOR : SubtargetFeature<
-    "fuse-crypto-eor", "HasFuseCryptoEOR", "true",
-    "CPU fuses AES/PMULL and EOR operations">;
+def FeatureFuseArithmeticLogic : SubtargetFeature<
+    "fuse-arith-logic", "HasFuseArithmeticLogic", "true",
+    "CPU fuses arithmetic and logic operations">;
 
 def FeatureFuseCCSelect : SubtargetFeature<
     "fuse-csel", "HasFuseCCSelect", "true",
     "CPU fuses conditional select operations">;
 
+def FeatureFuseCryptoEOR : SubtargetFeature<
+    "fuse-crypto-eor", "HasFuseCryptoEOR", "true",
+    "CPU fuses AES/PMULL and EOR operations">;
+
 def FeatureFuseLiterals : SubtargetFeature<
     "fuse-literals", "HasFuseLiterals", "true",
     "CPU fuses literal generation operations">;

Modified: llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp?rev=351139&r1=351138&r2=351139&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp Mon Jan 14 15:54:36 2019
@@ -270,7 +270,107 @@ static bool isCCSelectPair(const Machine
   return false;
 }
 
-/// Check if the instr pair, FirstMI and SecondMI, should be fused
+// Arithmetic and logic.
+static bool isArithmeticLogicPair(const MachineInstr *FirstMI,
+                                  const MachineInstr &SecondMI) {
+  if (AArch64InstrInfo::hasShiftedReg(SecondMI))
+    return false;
+
+  switch (SecondMI.getOpcode()) {
+  // Arithmetic
+  case AArch64::ADDWrr:
+  case AArch64::ADDXrr:
+  case AArch64::SUBWrr:
+  case AArch64::SUBXrr:
+  case AArch64::ADDWrs:
+  case AArch64::ADDXrs:
+  case AArch64::SUBWrs:
+  case AArch64::SUBXrs:
+  // Logic
+  case AArch64::ANDWrr:
+  case AArch64::ANDXrr:
+  case AArch64::BICWrr:
+  case AArch64::BICXrr:
+  case AArch64::EONWrr:
+  case AArch64::EONXrr:
+  case AArch64::EORWrr:
+  case AArch64::EORXrr:
+  case AArch64::ORNWrr:
+  case AArch64::ORNXrr:
+  case AArch64::ORRWrr:
+  case AArch64::ORRXrr:
+  case AArch64::ANDWrs:
+  case AArch64::ANDXrs:
+  case AArch64::BICWrs:
+  case AArch64::BICXrs:
+  case AArch64::EONWrs:
+  case AArch64::EONXrs:
+  case AArch64::EORWrs:
+  case AArch64::EORXrs:
+  case AArch64::ORNWrs:
+  case AArch64::ORNXrs:
+  case AArch64::ORRWrs:
+  case AArch64::ORRXrs:
+    // Assume the 1st instr to be a wildcard if it is unspecified.
+    if (FirstMI == nullptr)
+      return true;
+
+    // Arithmetic
+    switch (FirstMI->getOpcode()) {
+    case AArch64::ADDWrr:
+    case AArch64::ADDXrr:
+    case AArch64::ADDSWrr:
+    case AArch64::ADDSXrr:
+    case AArch64::SUBWrr:
+    case AArch64::SUBXrr:
+    case AArch64::SUBSWrr:
+    case AArch64::SUBSXrr:
+      return true;
+    case AArch64::ADDWrs:
+    case AArch64::ADDXrs:
+    case AArch64::ADDSWrs:
+    case AArch64::ADDSXrs:
+    case AArch64::SUBWrs:
+    case AArch64::SUBXrs:
+    case AArch64::SUBSWrs:
+    case AArch64::SUBSXrs:
+      return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
+    }
+    break;
+
+  // Arithmetic, setting flags.
+  case AArch64::ADDSWrr:
+  case AArch64::ADDSXrr:
+  case AArch64::SUBSWrr:
+  case AArch64::SUBSXrr:
+  case AArch64::ADDSWrs:
+  case AArch64::ADDSXrs:
+  case AArch64::SUBSWrs:
+  case AArch64::SUBSXrs:
+    // Assume the 1st instr to be a wildcard if it is unspecified.
+    if (FirstMI == nullptr)
+      return true;
+
+    // Arithmetic, not setting flags.
+    switch (FirstMI->getOpcode()) {
+    case AArch64::ADDWrr:
+    case AArch64::ADDXrr:
+    case AArch64::SUBWrr:
+    case AArch64::SUBXrr:
+      return true;
+    case AArch64::ADDWrs:
+    case AArch64::ADDXrs:
+    case AArch64::SUBWrs:
+    case AArch64::SUBXrs:
+      return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
+    }
+    break;
+  }
+
+  return false;
+}
+
+/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
 /// together. Given SecondMI, when FirstMI is unspecified, then check if
 /// SecondMI may be part of a fused pair at all.
 static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
@@ -295,6 +395,8 @@ static bool shouldScheduleAdjacent(const
     return true;
   if (ST.hasFuseCCSelect() && isCCSelectPair(FirstMI, SecondMI))
     return true;
+  if (ST.hasFuseArithmeticLogic() && isArithmeticLogicPair(FirstMI, SecondMI))
+    return true;
 
   return false;
 }

Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h?rev=351139&r1=351138&r2=351139&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h Mon Jan 14 15:54:36 2019
@@ -166,8 +166,9 @@ protected:
   bool HasArithmeticCbzFusion = false;
   bool HasFuseAddress = false;
   bool HasFuseAES = false;
-  bool HasFuseCryptoEOR = false;
+  bool HasFuseArithmeticLogic = false;
   bool HasFuseCCSelect = false;
+  bool HasFuseCryptoEOR = false;
   bool HasFuseLiterals = false;
   bool DisableLatencySchedHeuristic = false;
   bool UseRSqrt = false;
@@ -311,14 +312,16 @@ public:
   bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
   bool hasFuseAddress() const { return HasFuseAddress; }
   bool hasFuseAES() const { return HasFuseAES; }
-  bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; }
+  bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; }
   bool hasFuseCCSelect() const { return HasFuseCCSelect; }
+  bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; }
   bool hasFuseLiterals() const { return HasFuseLiterals; }
 
   /// Return true if the CPU supports any kind of instruction fusion.
   bool hasFusion() const {
     return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
-           hasFuseAES() || hasFuseCCSelect() || hasFuseLiterals();
+           hasFuseAES() || hasFuseArithmeticLogic() ||
+           hasFuseCCSelect() || hasFuseLiterals();
   }
 
   bool useRSqrt() const { return UseRSqrt; }

Added: llvm/trunk/test/CodeGen/AArch64/misched-fusion-arith-logic.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/misched-fusion-arith-logic.mir?rev=351139&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/misched-fusion-arith-logic.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/misched-fusion-arith-logic.mir Mon Jan 14 15:54:36 2019
@@ -0,0 +1,111 @@
+# RUN: llc -o - %s -mtriple aarch64-unknown -mattr=fuse-arith-logic -run-pass=machine-scheduler -misched-print-dags |& FileCheck %s
+# REQUIRES: asserts
+
+---
+name: arith
+body: |
+  bb.0.entry:
+    %0:gpr32 = SUBWrr undef $w0, undef $w1
+    %1:gpr32 = ADDWrr undef $w1, undef $w2
+    %2:gpr32 = SUBWrs %0, undef $w2, 0
+    %3:gpr32 = ADDWrs %1, undef $w3, 0
+
+    ; CHECK: SU(0): %0:gpr32 = SUBWrr undef $w0, undef $w1
+    ; CHECK: Successors:
+    ; CHECK: SU(2): Ord  Latency=0 Cluster
+    ; CHECK: SU(1): %1:gpr32 = ADDWrr undef $w1, undef $w2
+    ; CHECK: Successors:
+    ; CHECK: SU(3): Ord  Latency=0 Cluster
+    ; CHECK: SU(2): dead %2:gpr32 = SUBWrs %0:gpr32, undef $w2, 0
+    ; CHECK: Predecessors:
+    ; CHECK: SU(0): Ord  Latency=0 Cluster
+    ; CHECK: SU(3): dead %3:gpr32 = ADDWrs %1:gpr32, undef $w3, 0
+    ; CHECK: Predecessors:
+    ; CHECK: SU(1): Ord  Latency=0 Cluster
+...
+---
+name: compare
+body: |
+  bb.0.entry:
+    %0:gpr64 = ADDXrr undef $x0, undef $x1
+    %1:gpr64 = SUBXrs undef $x1, undef $x2, 0
+    %2:gpr64 = ADDSXrr %0, undef $x3, implicit-def $nzcv
+    %3:gpr64 = SUBSXrs %1, undef $x4, 0, implicit-def $nzcv
+
+    ; CHECK: SU(0): %0:gpr64 = ADDXrr undef $x0, undef $x1
+    ; CHECK: Successors:
+    ; CHECK: SU(2): Ord  Latency=0 Cluster
+    ; CHECK: SU(1): %1:gpr64 = SUBXrs undef $x1, undef $x2, 0
+    ; CHECK: Successors:
+    ; CHECK: SU(3): Ord  Latency=0 Cluster
+    ; CHECK: SU(2): dead %2:gpr64 = ADDSXrr %0:gpr64, undef $x3, implicit-def $nzcv
+    ; CHECK: Predecessors:
+    ; CHECK: SU(0): Ord  Latency=0 Cluster
+    ; CHECK: SU(3): dead %3:gpr64 = SUBSXrs %1:gpr64, undef $x4, 0, implicit-def $nzcv
+    ; CHECK: Predecessors:
+    ; CHECK: SU(1): Ord  Latency=0 Cluster
+...
+---
+name: logic
+body: |
+  bb.0.entry:
+    %0:gpr32 = ADDWrr undef $w0, undef $w1
+    %1:gpr64 = SUBXrs undef $x1, undef $x2, 0
+    %3:gpr32 = ANDWrs %0, undef $w3, 0
+    %4:gpr64 = ORRXrr %1, undef $x4
+
+    ; CHECK: SU(0): %0:gpr32 = ADDWrr undef $w0, undef $w1
+    ; CHECK: Successors:
+    ; CHECK: SU(2): Ord  Latency=0 Cluster
+    ; CHECK: SU(1): %1:gpr64 = SUBXrs undef $x1, undef $x2, 0
+    ; CHECK: Successors:
+    ; CHECK: SU(3): Ord  Latency=0 Cluster
+    ; CHECK: SU(2): dead %2:gpr32 = ANDWrs %0:gpr32, undef $w3, 0
+    ; CHECK: Predecessors:
+    ; CHECK: SU(0): Ord  Latency=0 Cluster
+    ; CHECK: SU(3): dead %3:gpr64 = ORRXrr %1:gpr64, undef $x4
+    ; CHECK: Predecessors:
+    ; CHECK: SU(1): Ord  Latency=0 Cluster
+...
+---
+name: nope
+body: |
+  bb.0.entry:
+    ; Shifted register.
+    %0:gpr32 = SUBWrr undef $w0, undef $w1
+    %1:gpr32 = SUBWrs %0, undef $w2, 1
+    ; CHECKSU(0)%0:gpr32 = SUBWrr undef $w0, undef $w1
+    ; CHECKSuccessors:
+    ; CHECK-NOTSU(1)Ord  Latency=0 Cluster
+    ; CHECKSU(1)dead %1:gpr32 = SUBWrs %0:gpr32, undef $w2, 1
+
+    ; Multiple successors.
+    %2:gpr64 = ADDXrr undef $x0, undef $x1
+    %3:gpr32 = EXTRACT_SUBREG %2, %subreg.sub_32
+    %4:gpr32 = ANDWrs %3, undef $w2, 0
+    %5:gpr64 = ADDSXrr %2, undef $x3, implicit-def $nzcv
+    ; CHECKSU(2)%2:gpr64 = ADDXrr undef $x0, undef $x1
+    ; CHECKSuccessors:
+    ; CHECK-NOTSU(3)Ord  Latency=0 Cluster
+    ; CHECKSU(5)Ord  Latency=0 Cluster
+    ; CHECKSU(3)%3:gpr32 = EXTRACT_SUBREG %2:gpr64, %subreg.sub_32
+    ; CHECKSU(5)dead %5:gpr64 = ADDSXrr %2:gpr64, undef $x3, implicit-def $nzcv
+
+    ; Different register sizes.
+    %6:gpr32 = SUBWrr undef $w0, undef $w1
+    %7:gpr64 = ADDXrr undef $x1, undef $x2
+    %8:gpr64 = SUBXrr %7, undef $x3
+    %9:gpr32 = ADDWrr %6, undef $w4
+    ; CHECKSU(6)%6:gpr32 = SUBWrr undef $w0, undef $w1
+    ; CHECKSuccessors:
+    ; CHECK-NOTSU(8)Ord  Latency=0 Cluster
+    ; CHECKSU(7)%7:gpr64 = ADDXrr undef $x1, undef $x2
+    ; CHECKSuccessors:
+    ; CHECK-NOTSU(9)Ord  Latency=0 Cluster
+    ; CHECKSU(8)dead %8:gpr64 = SUBXrr %7:gpr64, undef $x3
+    ; CHECKPredecessors:
+    ; CHECKSU(7)Ord  Latency=0 Cluster
+    ; CHECKSU(9)dead %9:gpr32 = ADDWrr %6:gpr32, undef $w4
+    ; CHECKPredecessors:
+    ; CHECKSU(6)Ord  Latency=0 Cluster
+...




More information about the llvm-commits mailing list