[llvm] f3dafd2 - Fix the macro fusion table for X86 according to Intel optimization

Shengchen Kan via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 4 22:40:34 PST 2019


Author: Shengchen Kan
Date: 2019-12-05T14:39:11+08:00
New Revision: f3dafd21a36f33f3829a4c81260ff5809c065111

URL: https://github.com/llvm/llvm-project/commit/f3dafd21a36f33f3829a4c81260ff5809c065111
DIFF: https://github.com/llvm/llvm-project/commit/f3dafd21a36f33f3829a4c81260ff5809c065111.diff

LOG: Fix the macro fusion table for X86 according to Intel optimization
manual and add function isMacroFused

Differential Revision: https://reviews.llvm.org/D70999

Added: 
    

Modified: 
    llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
    llvm/lib/Target/X86/X86MacroFusion.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 335127c6d064..0bb23b03685c 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -101,6 +101,248 @@ namespace X86 {
 
     COND_INVALID
   };
+
+  // The classification for the first instruction in macro fusion.
+  enum class FirstMacroFusionInstKind {
+    // TEST
+    Test,
+    // CMP
+    Cmp,
+    // AND
+    And,
+    // ADD, SUB
+    AddSub,
+    // INC, DEC
+    IncDec,
+    // Not valid as a first macro fusion instruction
+    Invalid
+  };
+
+  enum class SecondMacroFusionInstKind {
+    // JA, JB and variants.
+    AB,
+    // JE, JL, JG and variants.
+    ELG,
+    // JS, JP, JO and variants
+    SPO,
+    // Not a fusible jump.
+    Invalid,
+  };
+
+  /// classifyFirstOpcodeInMacroFusion - return the type of the first
+  /// instruction in macro-fusion.
+  inline FirstMacroFusionInstKind
+  classifyFirstOpcodeInMacroFusion(unsigned Opcode) {
+    switch (Opcode) {
+    default:
+      return FirstMacroFusionInstKind::Invalid;
+    // TEST
+    case X86::TEST16i16:
+    case X86::TEST16mr:
+    case X86::TEST16ri:
+    case X86::TEST16rr:
+    case X86::TEST32i32:
+    case X86::TEST32mr:
+    case X86::TEST32ri:
+    case X86::TEST32rr:
+    case X86::TEST64i32:
+    case X86::TEST64mr:
+    case X86::TEST64ri32:
+    case X86::TEST64rr:
+    case X86::TEST8i8:
+    case X86::TEST8mr:
+    case X86::TEST8ri:
+    case X86::TEST8rr:
+      return FirstMacroFusionInstKind::Test;
+    case X86::AND16i16:
+    case X86::AND16ri:
+    case X86::AND16ri8:
+    case X86::AND16rm:
+    case X86::AND16rr:
+    case X86::AND16rr_REV:
+    case X86::AND32i32:
+    case X86::AND32ri:
+    case X86::AND32ri8:
+    case X86::AND32rm:
+    case X86::AND32rr:
+    case X86::AND32rr_REV:
+    case X86::AND64i32:
+    case X86::AND64ri32:
+    case X86::AND64ri8:
+    case X86::AND64rm:
+    case X86::AND64rr:
+    case X86::AND64rr_REV:
+    case X86::AND8i8:
+    case X86::AND8ri:
+    case X86::AND8ri8:
+    case X86::AND8rm:
+    case X86::AND8rr:
+    case X86::AND8rr_REV:
+      return FirstMacroFusionInstKind::And;
+    // CMP
+    case X86::CMP16i16:
+    case X86::CMP16mr:
+    case X86::CMP16ri:
+    case X86::CMP16ri8:
+    case X86::CMP16rm:
+    case X86::CMP16rr:
+    case X86::CMP16rr_REV:
+    case X86::CMP32i32:
+    case X86::CMP32mr:
+    case X86::CMP32ri:
+    case X86::CMP32ri8:
+    case X86::CMP32rm:
+    case X86::CMP32rr:
+    case X86::CMP32rr_REV:
+    case X86::CMP64i32:
+    case X86::CMP64mr:
+    case X86::CMP64ri32:
+    case X86::CMP64ri8:
+    case X86::CMP64rm:
+    case X86::CMP64rr:
+    case X86::CMP64rr_REV:
+    case X86::CMP8i8:
+    case X86::CMP8mr:
+    case X86::CMP8ri:
+    case X86::CMP8ri8:
+    case X86::CMP8rm:
+    case X86::CMP8rr:
+    case X86::CMP8rr_REV:
+      return FirstMacroFusionInstKind::Cmp;
+    // ADD
+    case X86::ADD16i16:
+    case X86::ADD16ri:
+    case X86::ADD16ri8:
+    case X86::ADD16rm:
+    case X86::ADD16rr:
+    case X86::ADD16rr_REV:
+    case X86::ADD32i32:
+    case X86::ADD32ri:
+    case X86::ADD32ri8:
+    case X86::ADD32rm:
+    case X86::ADD32rr:
+    case X86::ADD32rr_REV:
+    case X86::ADD64i32:
+    case X86::ADD64ri32:
+    case X86::ADD64ri8:
+    case X86::ADD64rm:
+    case X86::ADD64rr:
+    case X86::ADD64rr_REV:
+    case X86::ADD8i8:
+    case X86::ADD8ri:
+    case X86::ADD8ri8:
+    case X86::ADD8rm:
+    case X86::ADD8rr:
+    case X86::ADD8rr_REV:
+    // SUB
+    case X86::SUB16i16:
+    case X86::SUB16ri:
+    case X86::SUB16ri8:
+    case X86::SUB16rm:
+    case X86::SUB16rr:
+    case X86::SUB16rr_REV:
+    case X86::SUB32i32:
+    case X86::SUB32ri:
+    case X86::SUB32ri8:
+    case X86::SUB32rm:
+    case X86::SUB32rr:
+    case X86::SUB32rr_REV:
+    case X86::SUB64i32:
+    case X86::SUB64ri32:
+    case X86::SUB64ri8:
+    case X86::SUB64rm:
+    case X86::SUB64rr:
+    case X86::SUB64rr_REV:
+    case X86::SUB8i8:
+    case X86::SUB8ri:
+    case X86::SUB8ri8:
+    case X86::SUB8rm:
+    case X86::SUB8rr:
+    case X86::SUB8rr_REV:
+      return FirstMacroFusionInstKind::AddSub;
+    // INC
+    case X86::INC16r:
+    case X86::INC16r_alt:
+    case X86::INC32r:
+    case X86::INC32r_alt:
+    case X86::INC64r:
+    case X86::INC8r:
+    // DEC
+    case X86::DEC16r:
+    case X86::DEC16r_alt:
+    case X86::DEC32r:
+    case X86::DEC32r_alt:
+    case X86::DEC64r:
+    case X86::DEC8r:
+      return FirstMacroFusionInstKind::IncDec;
+    }
+  }
+
+  /// classifySecondCondCodeInMacroFusion - return the type of the second
+  /// instruction in macro-fusion.
+  inline SecondMacroFusionInstKind
+  classifySecondCondCodeInMacroFusion(X86::CondCode CC) {
+    if (CC == X86::COND_INVALID)
+      return SecondMacroFusionInstKind::Invalid;
+
+    switch (CC) {
+    default:
+      return SecondMacroFusionInstKind::Invalid;
+    // JE,JZ
+    case X86::COND_E:
+    // JNE,JNZ
+    case X86::COND_NE:
+    // JL,JNGE
+    case X86::COND_L:
+    // JLE,JNG
+    case X86::COND_LE:
+    // JG,JNLE
+    case X86::COND_G:
+    // JGE,JNL
+    case X86::COND_GE:
+      return SecondMacroFusionInstKind::ELG;
+    // JB,JC
+    case X86::COND_B:
+    // JNA,JBE
+    case X86::COND_BE:
+    // JA,JNBE
+    case X86::COND_A:
+    // JAE,JNC,JNB
+    case X86::COND_AE:
+      return SecondMacroFusionInstKind::AB;
+    // JS
+    case X86::COND_S:
+    // JNS
+    case X86::COND_NS:
+    // JP,JPE
+    case X86::COND_P:
+    // JNP,JPO
+    case X86::COND_NP:
+    // JO
+    case X86::COND_O:
+    // JNO
+    case X86::COND_NO:
+      return SecondMacroFusionInstKind::SPO;
+    }
+  }
+
+  inline bool isMacroFused(FirstMacroFusionInstKind FirstKind,
+                           SecondMacroFusionInstKind SecondKind) {
+    switch (FirstKind) {
+    case X86::FirstMacroFusionInstKind::Test:
+    case X86::FirstMacroFusionInstKind::And:
+      return true;
+    case X86::FirstMacroFusionInstKind::Cmp:
+    case X86::FirstMacroFusionInstKind::AddSub:
+      return SecondKind == X86::SecondMacroFusionInstKind::AB ||
+             SecondKind == X86::SecondMacroFusionInstKind::ELG;
+    case X86::FirstMacroFusionInstKind::IncDec:
+      return SecondKind == X86::SecondMacroFusionInstKind::ELG;
+    case X86::FirstMacroFusionInstKind::Invalid:
+      return false;
+    }
+    llvm_unreachable("unknown fusion type");
+  }
 } // end namespace X86;
 
 /// X86II - This namespace holds all of the target specific flags that

diff  --git a/llvm/lib/Target/X86/X86MacroFusion.cpp b/llvm/lib/Target/X86/X86MacroFusion.cpp
index c6da4b09dd60..b19d1263e0c9 100644
--- a/llvm/lib/Target/X86/X86MacroFusion.cpp
+++ b/llvm/lib/Target/X86/X86MacroFusion.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/X86BaseInfo.h"
 #include "X86MacroFusion.h"
 #include "X86Subtarget.h"
 #include "llvm/CodeGen/MacroFusion.h"
@@ -18,160 +19,13 @@
 
 using namespace llvm;
 
-namespace {
-
-// The classification for the first instruction.
-enum class FirstInstrKind { Test, Cmp, And, ALU, IncDec, Invalid };
-
-// The classification for the second instruction (jump).
-enum class JumpKind {
-  // JE, JL, JG and variants.
-  ELG,
-  // JA, JB and variants.
-  AB,
-  // JS, JP, JO and variants.
-  SPO,
-  // Not a fusable jump.
-  Invalid,
-};
-
-} // namespace
-
-static FirstInstrKind classifyFirst(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    return FirstInstrKind::Invalid;
-  case X86::TEST8rr:
-  case X86::TEST16rr:
-  case X86::TEST32rr:
-  case X86::TEST64rr:
-  case X86::TEST8ri:
-  case X86::TEST16ri:
-  case X86::TEST32ri:
-  case X86::TEST64ri32:
-  case X86::TEST8mr:
-  case X86::TEST16mr:
-  case X86::TEST32mr:
-  case X86::TEST64mr:
-    return FirstInstrKind::Test;
-  case X86::AND16ri:
-  case X86::AND16ri8:
-  case X86::AND16rm:
-  case X86::AND16rr:
-  case X86::AND32ri:
-  case X86::AND32ri8:
-  case X86::AND32rm:
-  case X86::AND32rr:
-  case X86::AND64ri32:
-  case X86::AND64ri8:
-  case X86::AND64rm:
-  case X86::AND64rr:
-  case X86::AND8ri:
-  case X86::AND8rm:
-  case X86::AND8rr:
-    return FirstInstrKind::And;
-  case X86::CMP16ri:
-  case X86::CMP16ri8:
-  case X86::CMP16rm:
-  case X86::CMP16rr:
-  case X86::CMP16mr:
-  case X86::CMP32ri:
-  case X86::CMP32ri8:
-  case X86::CMP32rm:
-  case X86::CMP32rr:
-  case X86::CMP32mr:
-  case X86::CMP64ri32:
-  case X86::CMP64ri8:
-  case X86::CMP64rm:
-  case X86::CMP64rr:
-  case X86::CMP64mr:
-  case X86::CMP8ri:
-  case X86::CMP8rm:
-  case X86::CMP8rr:
-  case X86::CMP8mr:
-    return FirstInstrKind::Cmp;
-  case X86::ADD16ri:
-  case X86::ADD16ri8:
-  case X86::ADD16ri8_DB:
-  case X86::ADD16ri_DB:
-  case X86::ADD16rm:
-  case X86::ADD16rr:
-  case X86::ADD16rr_DB:
-  case X86::ADD32ri:
-  case X86::ADD32ri8:
-  case X86::ADD32ri8_DB:
-  case X86::ADD32ri_DB:
-  case X86::ADD32rm:
-  case X86::ADD32rr:
-  case X86::ADD32rr_DB:
-  case X86::ADD64ri32:
-  case X86::ADD64ri32_DB:
-  case X86::ADD64ri8:
-  case X86::ADD64ri8_DB:
-  case X86::ADD64rm:
-  case X86::ADD64rr:
-  case X86::ADD64rr_DB:
-  case X86::ADD8ri:
-  case X86::ADD8ri_DB:
-  case X86::ADD8rm:
-  case X86::ADD8rr:
-  case X86::ADD8rr_DB:
-  case X86::SUB16ri:
-  case X86::SUB16ri8:
-  case X86::SUB16rm:
-  case X86::SUB16rr:
-  case X86::SUB32ri:
-  case X86::SUB32ri8:
-  case X86::SUB32rm:
-  case X86::SUB32rr:
-  case X86::SUB64ri32:
-  case X86::SUB64ri8:
-  case X86::SUB64rm:
-  case X86::SUB64rr:
-  case X86::SUB8ri:
-  case X86::SUB8rm:
-  case X86::SUB8rr:
-    return FirstInstrKind::ALU;
-  case X86::INC16r:
-  case X86::INC32r:
-  case X86::INC64r:
-  case X86::INC8r:
-  case X86::DEC16r:
-  case X86::DEC32r:
-  case X86::DEC64r:
-  case X86::DEC8r:
-    return FirstInstrKind::IncDec;
-  }
+static X86::FirstMacroFusionInstKind classifyFirst(const MachineInstr &MI) {
+  return X86::classifyFirstOpcodeInMacroFusion(MI.getOpcode());
 }
 
-static JumpKind classifySecond(const MachineInstr &MI) {
+static X86::SecondMacroFusionInstKind classifySecond(const MachineInstr &MI) {
   X86::CondCode CC = X86::getCondFromBranch(MI);
-  if (CC == X86::COND_INVALID)
-    return JumpKind::Invalid;
-
-  switch (CC) {
-  default:
-    return JumpKind::Invalid;
-  case X86::COND_E:
-  case X86::COND_NE:
-  case X86::COND_L:
-  case X86::COND_LE:
-  case X86::COND_G:
-  case X86::COND_GE:
-    return JumpKind::ELG;
-  case X86::COND_B:
-  case X86::COND_BE:
-  case X86::COND_A:
-  case X86::COND_AE:
-    return JumpKind::AB;
-  case X86::COND_S:
-  case X86::COND_NS:
-  case X86::COND_P:
-  case X86::COND_NP:
-  case X86::COND_O:
-  case X86::COND_NO:
-    return JumpKind::SPO;
-  }
+  return X86::classifySecondCondCodeInMacroFusion(CC);
 }
 
 /// Check if the instr pair, FirstMI and SecondMI, should be fused
@@ -187,40 +41,27 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   if (!(ST.hasBranchFusion() || ST.hasMacroFusion()))
     return false;
 
-  const JumpKind BranchKind = classifySecond(SecondMI);
+  const X86::SecondMacroFusionInstKind BranchKind = classifySecond(SecondMI);
 
-  if (BranchKind == JumpKind::Invalid)
+  if (BranchKind == X86::SecondMacroFusionInstKind::Invalid)
     return false; // Second cannot be fused with anything.
 
   if (FirstMI == nullptr)
     return true; // We're only checking whether Second can be fused at all.
 
-  const FirstInstrKind TestKind = classifyFirst(*FirstMI);
+  const X86::FirstMacroFusionInstKind TestKind = classifyFirst(*FirstMI);
 
   if (ST.hasBranchFusion()) {
     // Branch fusion can merge CMP and TEST with all conditional jumps.
-    return (TestKind == FirstInstrKind::Cmp ||
-            TestKind == FirstInstrKind::Test);
+    return (TestKind == X86::FirstMacroFusionInstKind::Cmp ||
+            TestKind == X86::FirstMacroFusionInstKind::Test);
   }
 
   if (ST.hasMacroFusion()) {
-    // Macro Fusion rules are a bit more complex. See Agner Fog's
-    // Microarchitecture table 9.2 "Instruction Fusion".
-    switch (TestKind) {
-    case FirstInstrKind::Test:
-    case FirstInstrKind::And:
-      return true;
-    case FirstInstrKind::Cmp:
-    case FirstInstrKind::ALU:
-      return BranchKind == JumpKind::ELG || BranchKind == JumpKind::AB;
-    case FirstInstrKind::IncDec:
-      return BranchKind == JumpKind::ELG;
-    case FirstInstrKind::Invalid:
-      return false;
-    }
+    return X86::isMacroFused(TestKind, BranchKind);
   }
 
-  llvm_unreachable("unknown branch fusion type");
+  llvm_unreachable("unknown fusion type");
 }
 
 namespace llvm {


        


More information about the llvm-commits mailing list