[PATCH] D142502: [AArch64] Add A+B+1 and A-B-1 macro fusion for Ampere1A

Philipp Tomsich via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 24 12:54:56 PST 2023


philipp.tomsich created this revision.
philipp.tomsich added a reviewer: dmgreen.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
philipp.tomsich requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

The Ampere1A core supports a new fusion pattern that optimises the
case of A+B+1 and A-B-1.  Add support.

Depends on D142396 <https://reviews.llvm.org/D142396>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D142502

Files:
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64MacroFusion.cpp


Index: llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -379,6 +379,49 @@
   return false;
 }
 
+// "A + B + 1" or "A - B - 1"
+static bool isAddSub2RegAndConstOnePair(const MachineInstr *FirstMI,
+                                        const MachineInstr &SecondMI) {
+  int polarity = 1;
+
+  switch (SecondMI.getOpcode()) {
+  case AArch64::SUBWri:
+  case AArch64::SUBXri:
+    polarity = -1;
+    // fallthrough
+  case AArch64::ADDWri:
+  case AArch64::ADDXri:
+    if (!SecondMI.getOperand(2).isImm() ||
+        SecondMI.getOperand(2).getImm() != polarity)
+      return false;
+
+    // Assume the 1st instr to be a wildcard if it is unspecified.
+    if (FirstMI == nullptr) {
+      return true;
+    }
+    break;
+
+  default:
+    return false;
+  }
+
+  switch (FirstMI->getOpcode()) {
+  case AArch64::SUBWrr:
+  case AArch64::SUBXrr:
+    if (polarity == -1)
+      return true;
+    break;
+
+  case AArch64::ADDWrr:
+  case AArch64::ADDXrr:
+    if (polarity == 1)
+      return true;
+    break;
+  }
+
+  return false;
+}
+
 /// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
 /// together. Given SecondMI, when FirstMI is unspecified, then check if
 /// SecondMI may be part of a fused pair at all.
@@ -411,6 +454,9 @@
     return true;
   if (ST.hasFuseArithmeticLogic() && isArithmeticLogicPair(FirstMI, SecondMI))
     return true;
+  if (ST.hasFuseeAddSub2RegAndConstOne() &&
+      isAddSub2RegAndConstOnePair(FirstMI, SecondMI))
+    return true;
 
   return false;
 }
Index: llvm/lib/Target/AArch64/AArch64.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64.td
+++ llvm/lib/Target/AArch64/AArch64.td
@@ -289,6 +289,10 @@
     "fuse-literals", "HasFuseLiterals", "true",
     "CPU fuses literal generation operations">;
 
+def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature<
+   "fuse-addsub-2reg-const1", "HasFuseeAddSub2RegAndConstOne", "true",
+   "CPU fuses (a + b + 1) and (a - b - 1)">;
+
 def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
     "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
     "Disable latency scheduling heuristic">;
@@ -1306,12 +1310,12 @@
                                    FeatureNEON, FeaturePerfMon, FeatureSPE,
                                    FeatureFullFP16, FeatureFP16FML, FeatureDotProd];
   list<SubtargetFeature> Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
-                                    FeatureSSBS, FeatureRandGen, FeatureSB,
-                                    FeatureSHA2, FeatureSHA3, FeatureAES];
+                                    FeatureSSBS, FeatureRandGen, FeatureSHA2,
+                                    FeatureSHA3, FeatureAES];
   list<SubtargetFeature> Ampere1A = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
                                      FeatureMTE, FeatureSSBS, FeatureRandGen,
-                                     FeatureSB, FeatureSM4, FeatureSHA2,
-                                     FeatureSHA3, FeatureAES];
+                                     FeatureSM4, FeatureSHA2, FeatureSHA3,
+                                     FeatureAES];
 
   // ETE and TRBE are future architecture extensions. We temporarily enable them
   // by default for users targeting generic AArch64. The extensions do not


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D142502.491881.patch
Type: text/x-patch
Size: 3511 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230124/13acfe14/attachment-0001.bin>


More information about the llvm-commits mailing list