[llvm] 59f4b3d - [PowerPC] Implement more fusion types for Power10

Qiu Chaofan via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 23 01:24:17 PST 2021


Author: Qiu Chaofan
Date: 2021-11-23T17:21:17+08:00
New Revision: 59f4b3d3081535b61609f12ea5f638905616fcbc

URL: https://github.com/llvm/llvm-project/commit/59f4b3d3081535b61609f12ea5f638905616fcbc
DIFF: https://github.com/llvm/llvm-project/commit/59f4b3d3081535b61609f12ea5f638905616fcbc.diff

LOG: [PowerPC] Implement more fusion types for Power10

This implements the rest of Power10 instruction fusion pairs, according
to user manual, including 'wide immediate', 'load compare', 'zero move'
and 'SHA3 assist'.

Only 'SHA3 assist' is enabled by default.

Reviewed By: shchenz

Differential Revision: https://reviews.llvm.org/D112912

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPC.td
    llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
    llvm/lib/Target/PowerPC/PPCMacroFusion.def
    llvm/lib/Target/PowerPC/PPCSubtarget.cpp
    llvm/lib/Target/PowerPC/PPCSubtarget.h
    llvm/test/CodeGen/PowerPC/macro-fusion.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index a1ff20bb36121..422bd11dca52e 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -203,6 +203,22 @@ def FeatureLogicalFusion :
   SubtargetFeature<"fuse-logical", "HasLogicalFusion", "true",
                    "Target supports Logical Operations fusion",
                    [FeatureFusion]>;
+def FeatureSha3Fusion :
+  SubtargetFeature<"fuse-sha3", "HasSha3Fusion", "true",
+                   "Target supports SHA3 assist fusion",
+                   [FeatureFusion]>;
+def FeatureCompareFusion:
+  SubtargetFeature<"fuse-cmp", "HasCompareFusion", "true",
+                   "Target supports Comparison Operations fusion",
+                   [FeatureFusion]>;
+def FeatureWideImmFusion:
+  SubtargetFeature<"fuse-wideimm", "HasWideImmFusion", "true",
+                   "Target supports Wide-Immediate fusion",
+                   [FeatureFusion]>;
+def FeatureZeroMoveFusion:
+  SubtargetFeature<"fuse-zeromove", "HasZeroMoveFusion", "true",
+                   "Target supports move to SPR with branch fusion",
+                   [FeatureFusion]>;
 def FeatureUnalignedFloats :
   SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
                    "true", "CPU does not trap on unaligned FP access">;
@@ -393,7 +409,7 @@ def ProcessorFeatures {
   // still exist with the exception of those we know are Power9 specific.
   list<SubtargetFeature> FusionFeatures = [
     FeatureStoreFusion, FeatureAddLogicalFusion, FeatureLogicalAddFusion,
-    FeatureLogicalFusion, FeatureArithAddFusion
+    FeatureLogicalFusion, FeatureArithAddFusion, FeatureSha3Fusion,
   ];
   list<SubtargetFeature> P10AdditionalFeatures =
     !listconcat(FusionFeatures, [

diff  --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
index bdff5109c1e13..9d5206f8fd43c 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -149,6 +149,79 @@ static bool checkOpConstraints(FusionFeature::FusionKind Kd,
   case FusionFeature::FK_SldiAdd:
     return (matchingImmOps(FirstMI, 2, 3) && matchingImmOps(FirstMI, 3, 60)) ||
            (matchingImmOps(FirstMI, 2, 6) && matchingImmOps(FirstMI, 3, 57));
+
+  // rldicl rx, ra, 1, 0  - xor
+  case FusionFeature::FK_RotateLeftXor:
+    return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 0);
+
+  // rldicr rx, ra, 1, 63 - xor
+  case FusionFeature::FK_RotateRightXor:
+    return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 63);
+
+  // We actually use CMPW* and CMPD*, 'l' doesn't exist as an operand in instr.
+
+  // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 }
+  // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 }
+  case FusionFeature::FK_LoadCmp1:
+  // { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 }
+  // { ld,ldx } - cmpli 0,1,rx,{ 0,1 }
+  case FusionFeature::FK_LoadCmp2: {
+    const MachineOperand &BT = SecondMI.getOperand(0);
+    if (!BT.isReg() ||
+        (!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0))
+      return false;
+    if (SecondMI.getOpcode() == PPC::CMPDI &&
+        matchingImmOps(SecondMI, 2, -1, 16))
+      return true;
+    return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1);
+  }
+
+  // { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 }
+  case FusionFeature::FK_LoadCmp3: {
+    const MachineOperand &BT = SecondMI.getOperand(0);
+    if (!BT.isReg() ||
+        (!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0))
+      return false;
+    return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1) ||
+           matchingImmOps(SecondMI, 2, -1, 16);
+  }
+
+  // mtctr - { bcctr,bcctrl }
+  case FusionFeature::FK_ZeroMoveCTR:
+    // ( mtctr rx ) is alias of ( mtspr 9, rx )
+    return (FirstMI.getOpcode() != PPC::MTSPR &&
+            FirstMI.getOpcode() != PPC::MTSPR8) ||
+           matchingImmOps(FirstMI, 0, 9);
+
+  // mtlr - { bclr,bclrl }
+  case FusionFeature::FK_ZeroMoveLR:
+    // ( mtlr rx ) is alias of ( mtspr 8, rx )
+    return (FirstMI.getOpcode() != PPC::MTSPR &&
+            FirstMI.getOpcode() != PPC::MTSPR8) ||
+           matchingImmOps(FirstMI, 0, 8);
+
+  // addis rx,ra,si - addi rt,rx,SI, SI >= 0
+  case FusionFeature::FK_AddisAddi: {
+    const MachineOperand &RA = FirstMI.getOperand(1);
+    const MachineOperand &SI = SecondMI.getOperand(2);
+    if (!SI.isImm() || !RA.isReg())
+      return false;
+    if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8)
+      return false;
+    return SignExtend64(SI.getImm(), 16) >= 0;
+  }
+
+  // addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
+  case FusionFeature::FK_AddiAddis: {
+    const MachineOperand &RA = FirstMI.getOperand(1);
+    const MachineOperand &SI = FirstMI.getOperand(2);
+    if (!SI.isImm() || !RA.isReg())
+      return false;
+    if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8)
+      return false;
+    int64_t ExtendedSI = SignExtend64(SI.getImm(), 16);
+    return ExtendedSI >= 2;
+  }
   }
 
   llvm_unreachable("All the cases should have been handled");

diff  --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
index 469a24800423a..e4954b722fd0f 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.def
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
@@ -78,5 +78,80 @@ FUSION_FEATURE(VecLogical, hasLogicalFusion, -1,
 FUSION_FEATURE(SldiAdd, hasArithAddFusion, -1, FUSION_OP_SET(RLDICR, RLDICR_32),
                FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8))
 
+// rldicl rx, ra, 1, 0  - xor
+FUSION_FEATURE(RotateLeftXor, hasSha3Fusion, 1,
+               FUSION_OP_SET(RLDICL, RLDICL_32, RLDICL_32_64),
+               FUSION_OP_SET(XOR, XOR8))
+
+// rldicr rx, ra, 1, 63 - xor
+FUSION_FEATURE(RotateRightXor, hasSha3Fusion, 1,
+               FUSION_OP_SET(RLDICR, RLDICR_32), FUSION_OP_SET(XOR, XOR8))
+
+// There're two special cases in 'load-compare' series, so we have to split
+// them into several pattern groups to fit into current framework. This can
+// be clearer once we switched to a more expressive approach.
+
+// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 }
+// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 }
+FUSION_FEATURE(LoadCmp1, hasCompareFusion, 1,
+               FUSION_OP_SET(LBZ, LBZ8, LBZX, LBZX8, LBZXTLS, LBZXTLS_,
+                             LBZXTLS_32, LHZ, LHZ8, LHZX, LHZX8, LHZXTLS,
+                             LHZXTLS_, LHZXTLS_32, LWZ, LWZ8, LWZX, LWZX8,
+                             LWZXTLS, LWZXTLS_, LWZXTLS_32),
+               FUSION_OP_SET(CMPDI, CMPLDI, CMPLWI))
+
+// { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 }
+// { ld,ldx } - cmpli 0,1,rx,{ 0,1 }
+FUSION_FEATURE(LoadCmp2, hasCompareFusion, 1,
+               FUSION_OP_SET(LD, LDX, LDXTLS, LDXTLS_),
+               FUSION_OP_SET(CMPDI, CMPLDI))
+
+// { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 }
+FUSION_FEATURE(LoadCmp3, hasCompareFusion, 1,
+               FUSION_OP_SET(LHA, LHA8, LHAX, LHAX8, LWA, LWA_32, LWAX,
+                             LWAX_32),
+               FUSION_OP_SET(CMPLDI, CMPLWI))
+
+// ori - oris
+FUSION_FEATURE(OriOris, hasWideImmFusion, 1, FUSION_OP_SET(ORI, ORI8),
+               FUSION_OP_SET(ORIS, ORIS8))
+
+// lis - ori
+FUSION_FEATURE(LisOri, hasWideImmFusion, 1, FUSION_OP_SET(LIS, LIS8),
+               FUSION_OP_SET(ORI, ORI8))
+
+// oris - ori
+FUSION_FEATURE(OrisOri, hasWideImmFusion, 1, FUSION_OP_SET(ORIS, ORIS8),
+               FUSION_OP_SET(ORI, ORI8))
+
+// xori - xoris
+FUSION_FEATURE(XoriXoris, hasWideImmFusion, 1, FUSION_OP_SET(XORI, XORI8),
+               FUSION_OP_SET(XORIS, XORIS8))
+
+// xoris - xori
+FUSION_FEATURE(XorisXori, hasWideImmFusion, 1, FUSION_OP_SET(XORIS, XORIS8),
+               FUSION_OP_SET(XORI, XORI8))
+
+// addis rx,ra,si - addi rt,rx,SI, SI >= 0
+FUSION_FEATURE(AddisAddi, hasWideImmFusion, 1,
+               FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8),
+               FUSION_OP_SET(ADDI, ADDI8, ADDItocL))
+
+// addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
+FUSION_FEATURE(AddiAddis, hasWideImmFusion, 1,
+               FUSION_OP_SET(ADDI, ADDI8, ADDItocL),
+               FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8))
+
+// mtctr - { bcctr,bcctrl }
+FUSION_FEATURE(ZeroMoveCTR, hasZeroMoveFusion, -1,
+               FUSION_OP_SET(MTCTR, MTCTRloop, MTSPR8, MTSPR),
+               FUSION_OP_SET(BCCTR, BCCTRn, BCCTR8, BCCTR8n, BCCTRL, BCCTRLn,
+                             BCCTRL8, BCCTRL8n, gBCCTR, gBCCTRL))
+
+// mtlr - { bclr,bclrl }
+FUSION_FEATURE(ZeroMoveLR, hasZeroMoveFusion, -1,
+               FUSION_OP_SET(MTLR8, MTLR, MTSPR8, MTSPR),
+               FUSION_OP_SET(BCLR, BCLRn, gBCLR, BCLRL, BCLRLn, gBCLRL))
+
 #undef FUSION_FEATURE
 #undef FUSION_OP_SET

diff  --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index dfc29dbb10f19..1258a1281597a 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -131,6 +131,10 @@ void PPCSubtarget::initializeEnvironment() {
   HasAddLogicalFusion = false;
   HasLogicalAddFusion = false;
   HasLogicalFusion = false;
+  HasSha3Fusion = false;
+  HasCompareFusion = false;
+  HasWideImmFusion = false;
+  HasZeroMoveFusion = false;
   IsISA2_06 = false;
   IsISA2_07 = false;
   IsISA3_0 = false;

diff  --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 783ea121ccb83..d52833cb1465d 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -151,6 +151,10 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
   bool HasAddLogicalFusion;
   bool HasLogicalAddFusion;
   bool HasLogicalFusion;
+  bool HasSha3Fusion;
+  bool HasCompareFusion;
+  bool HasWideImmFusion;
+  bool HasZeroMoveFusion;
   bool IsISA2_06;
   bool IsISA2_07;
   bool IsISA3_0;
@@ -340,6 +344,10 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
   bool hasAddLogicalFusion() const { return HasAddLogicalFusion; }
   bool hasLogicalAddFusion() const { return HasLogicalAddFusion; }
   bool hasLogicalFusion() const { return HasLogicalFusion; }
+  bool hasCompareFusion() const { return HasCompareFusion; }
+  bool hasWideImmFusion() const { return HasWideImmFusion; }
+  bool hasSha3Fusion() const { return HasSha3Fusion; }
+  bool hasZeroMoveFusion() const { return HasZeroMoveFusion; }
   bool needsSwapsForVSXMemOps() const {
     return hasVSX() && isLittleEndian() && !hasP9Vector();
   }

diff  --git a/llvm/test/CodeGen/PowerPC/macro-fusion.mir b/llvm/test/CodeGen/PowerPC/macro-fusion.mir
index 16391a2ab8fa2..91c435d290ffc 100644
--- a/llvm/test/CodeGen/PowerPC/macro-fusion.mir
+++ b/llvm/test/CodeGen/PowerPC/macro-fusion.mir
@@ -1,6 +1,7 @@
 # REQUIRES: asserts
 # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 -x=mir < %s \
 # RUN:   -debug-only=machine-scheduler -start-before=postmisched 2>&1 \
+# RUN:   -mattr=+fuse-zeromove,+fuse-cmp,+fuse-wideimm \
 # RUN:   | FileCheck %s
 
 # CHECK: add_mulld:%bb.0
@@ -93,3 +94,55 @@ body:             |
     renamable $x3 = ADD8 killed renamable $x4, $x5
     BLR8 implicit $lr8, implicit $rm, implicit $x3
 ...
+
+# CHECK: rldicl_xor:%bb.0
+# CHECK: Macro fuse: SU(0) - SU(1) /  RLDICL - XOR8
+---
+name: rldicl_xor
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x3, $x4, $x5
+    renamable $x4 = RLDICL $x3, 1, 0
+    renamable $x3 = XOR8 killed renamable $x4, $x5
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+# CHECK: rldicr_xor:%bb.0
+# CHECK: Macro fuse: SU(0) - SU(1) /  RLDICR - XOR8
+---
+name: rldicr_xor
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x3, $x4, $x5
+    renamable $x4 = RLDICR $x3, 1, 63
+    renamable $x3 = XOR8 killed renamable $x4, $x5
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+# CHECK: ori_oris:%bb.0
+# CHECK: Macro fuse: SU(0) - SU(1) /  ORI8 - ORIS8
+---
+name: ori_oris
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x3, $x4
+    renamable $x4 = ORI8 $x3, 63
+    renamable $x3 = ORIS8 killed renamable $x4, 20
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+# CHECK: load_cmp:%bb.0
+# CHECK: Macro fuse: SU(0) - SU(1) /  LD - CMPDI
+---
+name: load_cmp
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x3, $x4, $x5
+    renamable $x3 = LD 0, killed renamable $x3
+    renamable $cr0 = CMPDI killed renamable $x3, 0
+    renamable $x3 = ISEL8 killed renamable $x5, killed renamable $x4, renamable $cr0lt, implicit killed $cr0
+    BLR8 implicit $lr8, implicit $rm, implicit $x3


        


More information about the llvm-commits mailing list