[llvm-branch-commits] [llvm] 4db1cf5 - X86InstrInfo: Optimize more combinations of SUB+CMP

Matthias Braun via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Oct 28 10:33:19 PDT 2021


Author: Matthias Braun
Date: 2021-10-28T10:23:28-07:00
New Revision: 4db1cf53740a8770b51ac1c03e8e9cc5e66eb83d

URL: https://github.com/llvm/llvm-project/commit/4db1cf53740a8770b51ac1c03e8e9cc5e66eb83d
DIFF: https://github.com/llvm/llvm-project/commit/4db1cf53740a8770b51ac1c03e8e9cc5e66eb83d.diff

LOG: X86InstrInfo: Optimize more combinations of SUB+CMP

`X86InstrInfo::optimizeCompareInstr` would only optimize a `SUB`
followed by a `CMP` in `isRedundantFlagInstr`. This extends the code to
also look for other combinations like `CMP`+`CMP`, `TEST`+`TEST`, `SUB
x,0`+`TEST`.

- Change `isRedundantFlagInstr` to run `analyzeCompareInstr` on the
  candidate instruction and compare the results. This normalizes things
  and gives consistent results for various comparisons (`CMP x, y`,
  `SUB x, y`) and immediate cases (`TEST x, x`, `SUB x, 0`,
  `CMP x, 0`...).
- Turn `isRedundantFlagInstr` into a member function so it can call
  `analyzeCompare`.  - We now also check `isRedundantFlagInstr` even if
  `IsCmpZero` is true, since we now have cases like `TEST`+`TEST`.

Differential Revision: https://reviews.llvm.org/D110865

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86InstrInfo.cpp
    llvm/lib/Target/X86/X86InstrInfo.h
    llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll
    llvm/test/CodeGen/X86/optimize-compare.mir
    llvm/test/CodeGen/X86/postalloc-coalescing.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 96180e2b2bf2..81e7184fa07a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4011,42 +4011,72 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
   return false;
 }
 
-/// Check whether the first instruction, whose only
-/// purpose is to update flags, can be made redundant.
-/// CMPrr can be made redundant by SUBrr if the operands are the same.
-/// This function can be extended later on.
-/// SrcReg, SrcRegs: register operands for FlagI.
-/// ImmValue: immediate for FlagI if it takes an immediate.
-inline static bool isRedundantFlagInstr(const MachineInstr &FlagI,
+bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
                                         Register SrcReg, Register SrcReg2,
                                         int64_t ImmMask, int64_t ImmValue,
-                                        const MachineInstr &OI) {
-  if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
-       (FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) ||
-       (FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) ||
-       (FlagI.getOpcode() == X86::CMP8rr && OI.getOpcode() == X86::SUB8rr)) &&
-      ((OI.getOperand(1).getReg() == SrcReg &&
-        OI.getOperand(2).getReg() == SrcReg2) ||
-       (OI.getOperand(1).getReg() == SrcReg2 &&
-        OI.getOperand(2).getReg() == SrcReg)))
-    return true;
-
-  if (ImmMask != 0 &&
-      ((FlagI.getOpcode() == X86::CMP64ri32 &&
-        OI.getOpcode() == X86::SUB64ri32) ||
-       (FlagI.getOpcode() == X86::CMP64ri8 &&
-        OI.getOpcode() == X86::SUB64ri8) ||
-       (FlagI.getOpcode() == X86::CMP32ri && OI.getOpcode() == X86::SUB32ri) ||
-       (FlagI.getOpcode() == X86::CMP32ri8 &&
-        OI.getOpcode() == X86::SUB32ri8) ||
-       (FlagI.getOpcode() == X86::CMP16ri && OI.getOpcode() == X86::SUB16ri) ||
-       (FlagI.getOpcode() == X86::CMP16ri8 &&
-        OI.getOpcode() == X86::SUB16ri8) ||
-       (FlagI.getOpcode() == X86::CMP8ri && OI.getOpcode() == X86::SUB8ri)) &&
-      OI.getOperand(1).getReg() == SrcReg &&
-      OI.getOperand(2).getImm() == ImmValue)
-    return true;
-  return false;
+                                        const MachineInstr &OI,
+                                        bool *IsSwapped) const {
+  switch (OI.getOpcode()) {
+  case X86::CMP64rr:
+  case X86::CMP32rr:
+  case X86::CMP16rr:
+  case X86::CMP8rr:
+  case X86::SUB64rr:
+  case X86::SUB32rr:
+  case X86::SUB16rr:
+  case X86::SUB8rr: {
+    Register OISrcReg;
+    Register OISrcReg2;
+    int64_t OIMask;
+    int64_t OIValue;
+    if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) ||
+        OIMask != ImmMask || OIValue != ImmValue)
+      return false;
+    if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
+      *IsSwapped = false;
+      return true;
+    }
+    if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
+      *IsSwapped = true;
+      return true;
+    }
+    return false;
+  }
+  case X86::CMP64ri32:
+  case X86::CMP64ri8:
+  case X86::CMP32ri:
+  case X86::CMP32ri8:
+  case X86::CMP16ri:
+  case X86::CMP16ri8:
+  case X86::CMP8ri:
+  case X86::SUB64ri32:
+  case X86::SUB64ri8:
+  case X86::SUB32ri:
+  case X86::SUB32ri8:
+  case X86::SUB16ri:
+  case X86::SUB16ri8:
+  case X86::SUB8ri:
+  case X86::TEST64rr:
+  case X86::TEST32rr:
+  case X86::TEST16rr:
+  case X86::TEST8rr: {
+    if (ImmMask != 0) {
+      Register OISrcReg;
+      Register OISrcReg2;
+      int64_t OIMask;
+      int64_t OIValue;
+      if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) &&
+          SrcReg == OISrcReg && ImmMask == OIMask && OIValue == ImmValue) {
+        assert(SrcReg2 == X86::NoRegister && OISrcReg2 == X86::NoRegister &&
+               "should not have 2nd register");
+        return true;
+      }
+    }
+    return FlagI.isIdenticalTo(OI);
+  }
+  default:
+    return false;
+  }
 }
 
 /// Check whether the definition can be converted
@@ -4275,12 +4305,19 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
 
   bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
 
+  // Transformation currently requires SSA values.
+  if (SrcReg2.isPhysical())
+    return false;
+  MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
+  assert(SrcRegDef && "Must have a definition (SSA)");
+
   MachineInstr *MI = nullptr;
   MachineInstr *Sub = nullptr;
   MachineInstr *Movr0Inst = nullptr;
   bool NoSignFlag = false;
   bool ClearsOverflowFlag = false;
   bool ShouldUpdateCC = false;
+  bool IsSwapped = false;
   X86::CondCode NewCC = X86::COND_INVALID;
 
   // Search backward from CmpInstr for the next instruction defining EFLAGS.
@@ -4288,8 +4325,6 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   MachineBasicBlock &CmpMBB = *CmpInstr.getParent();
   MachineBasicBlock::reverse_iterator From =
       std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
-  MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
-  assert(SrcRegDef && "Must have a definition (SSA)");
   for (MachineBasicBlock *MBB = &CmpMBB;;) {
     for (MachineInstr &Inst : make_range(From, MBB->rend())) {
       // Try to use EFLAGS from the instruction defining %SrcReg. Example:
@@ -4329,8 +4364,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
         //     sub x, y  or  cmp x, y
         //     ...           // EFLAGS not changed
         //     cmp x, y      // <-- can be removed
-        if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2,
-                                               CmpMask, CmpValue, Inst)) {
+        if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
+                                 Inst, &IsSwapped)) {
           Sub = &Inst;
           break;
         }
@@ -4360,10 +4395,6 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     From = MBB->rbegin();
   }
 
-  bool IsSwapped =
-      (SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
-       Sub->getOperand(2).getReg() == SrcReg);
-
   // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
   // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
   // If we are done with the basic block, we need to check whether EFLAGS is
@@ -4386,7 +4417,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
 
     // EFLAGS is used by this instruction.
     X86::CondCode OldCC = X86::COND_INVALID;
-    if (IsCmpZero || IsSwapped) {
+    if (MI || IsSwapped) {
       // We decode the condition code from opcode.
       if (Instr.isBranch())
         OldCC = X86::getCondFromBranch(Instr);
@@ -4398,7 +4429,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
       if (OldCC == X86::COND_INVALID) return false;
     }
     X86::CondCode ReplacementCC = X86::COND_INVALID;
-    if (IsCmpZero) {
+    if (MI) {
       switch (OldCC) {
       default: break;
       case X86::COND_A: case X86::COND_AE:
@@ -4456,14 +4487,15 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
 
   // If EFLAGS is not killed nor re-defined, we should check whether it is
   // live-out. If it is live-out, do not optimize.
-  if ((IsCmpZero || IsSwapped) && !IsSafe) {
+  if ((MI || IsSwapped) && !IsSafe) {
     for (MachineBasicBlock *Successor : CmpMBB.successors())
       if (Successor->isLiveIn(X86::EFLAGS))
         return false;
   }
 
   // The instruction to be updated is either Sub or MI.
-  Sub = IsCmpZero ? MI : Sub;
+  assert((MI == nullptr || Sub == nullptr) && "Should not have Sub and MI set");
+  Sub = MI != nullptr ? MI : Sub;
   MachineBasicBlock *SubBB = Sub->getParent();
   // Move Movr0Inst to the appropriate place before Sub.
   if (Movr0Inst) {

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 624a03a76a34..4b5d6e8886c3 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -626,6 +626,21 @@ class X86InstrInfo final : public X86GenInstrInfo {
                                      unsigned &SrcOpIdx1,
                                      unsigned &SrcOpIdx2,
                                      bool IsIntrinsic = false) const;
+
+  /// Returns true when instruction \p FlagI produces the same flags as \p OI.
+  /// The caller should pass in the results of calling analyzeCompare on \p OI:
+  /// \p SrcReg, \p SrcReg2, \p ImmMask, \p ImmValue.
+  /// If the flags match \p OI as if it had the input operands swapped then the
+  /// function succeeds and sets \p IsSwapped to true.
+  ///
+  /// Examples of OI, FlagI pairs returning true:
+  ///   CMP %1, 42   and  CMP %1, 42
+  ///   CMP %1, %2   and  %3 = SUB %1, %2
+  ///   TEST %1, %1  and  %2 = SUB %1, 0
+  ///   CMP %1, %2   and  %3 = SUB %2, %1  ; IsSwapped=true
+  bool isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg,
+                            Register SrcReg2, int64_t ImmMask, int64_t ImmValue,
+                            const MachineInstr &OI, bool *IsSwapped) const;
 };
 
 } // namespace llvm

diff  --git a/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll b/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll
index 010b9f4bea48..05baa14cde7b 100644
--- a/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll
+++ b/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll
@@ -67,7 +67,6 @@ define i16 @main_bb_2E_i9_2E_i_2E_i932_2E_ce(%struct.list* %l_addr.01.0.i2.i.i92
 ; CHECK-NEXT:  LBB0_6: ## %NodeBlock
 ; CHECK-NEXT:    js LBB0_9
 ; CHECK-NEXT:  ## %bb.7: ## %LeafBlock1
-; CHECK-NEXT:    testl %eax, %eax
 ; CHECK-NEXT:    jne LBB0_3
 ; CHECK-NEXT:  ## %bb.8: ## %bb12.i.i935.exitStub
 ; CHECK-NEXT:    movl %edi, (%esi)

diff  --git a/llvm/test/CodeGen/X86/optimize-compare.mir b/llvm/test/CodeGen/X86/optimize-compare.mir
index 5d0ffd542f89..dc15cbbab8b1 100644
--- a/llvm/test/CodeGen/X86/optimize-compare.mir
+++ b/llvm/test/CodeGen/X86/optimize-compare.mir
@@ -212,8 +212,7 @@ body: |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
     ; CHECK-NEXT: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags
     ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
-    ; CHECK-NEXT: CMP32rr [[COPY1]], [[COPY]], implicit-def $eflags
-    ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
     %0:gr32 = COPY $esi
     %1:gr32 = COPY $edi
     CMP32rr %0, %1, implicit-def $eflags
@@ -231,7 +230,6 @@ body: |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
     ; CHECK-NEXT: CMP64ri8 [[COPY]], 15, implicit-def $eflags
     ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
-    ; CHECK-NEXT: CMP64ri8 [[COPY]], 15, implicit-def $eflags
     ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
     %0:gr64 = COPY $rsi
     %1:gr64 = COPY $rdi
@@ -249,7 +247,6 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $ax
     ; CHECK-NEXT: TEST16rr [[COPY]], [[COPY]], implicit-def $eflags
     ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
-    ; CHECK-NEXT: TEST16rr [[COPY]], [[COPY]], implicit-def $eflags
     ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
     %0:gr16 = COPY $ax
     TEST16rr %0, %0, implicit-def $eflags
@@ -267,8 +264,7 @@ body: |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
     ; CHECK-NEXT: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags
     ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
-    ; CHECK-NEXT: CMP32rr [[COPY1]], [[COPY]], implicit-def $eflags
-    ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
     %0:gr32 = COPY $esi
     %1:gr32 = COPY $edi
     CMP32rr %0, %1, implicit-def $eflags
@@ -285,7 +281,6 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
     ; CHECK-NEXT: CMP32ri [[COPY]], -12345, implicit-def $eflags
     ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
-    ; CHECK-NEXT: CMP32ri [[COPY]], -12345, implicit-def $eflags
     ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
     %0:gr32 = COPY $esi
     CMP32ri %0, -12345, implicit-def $eflags
@@ -323,7 +318,6 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
     ; CHECK-NEXT: CMP32ri8 [[COPY]], 0, implicit-def $eflags
     ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
-    ; CHECK-NEXT: TEST32rr [[COPY]], [[COPY]], implicit-def $eflags
     ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
     %0:gr32 = COPY $esi
     CMP32ri8 %0, 0, implicit-def $eflags
@@ -340,7 +334,6 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
     ; CHECK-NEXT: TEST32rr [[COPY]], [[COPY]], implicit-def $eflags
     ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
-    ; CHECK-NEXT: CMP32ri8 [[COPY]], 0, implicit-def $eflags
     ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
     %0:gr32 = COPY $esi
     TEST32rr %0, %0, implicit-def $eflags
@@ -359,7 +352,6 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
     ; CHECK-NEXT: CMP64ri32 [[COPY]], @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags
     ; CHECK-NEXT: $cl = SETCCr 7, implicit $eflags
-    ; CHECK-NEXT: CMP64ri32 [[COPY]], @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags
     ; CHECK-NEXT: $cl = SETCCr 3, implicit $eflags
     %0:gr64 = COPY $rsi
     CMP64ri32 %0, @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags

diff  --git a/llvm/test/CodeGen/X86/postalloc-coalescing.ll b/llvm/test/CodeGen/X86/postalloc-coalescing.ll
index 7f190a342474..d0fb86836164 100644
--- a/llvm/test/CodeGen/X86/postalloc-coalescing.ll
+++ b/llvm/test/CodeGen/X86/postalloc-coalescing.ll
@@ -15,7 +15,6 @@ define fastcc i32 @_Z18yy_get_next_bufferv() nounwind {
 ; CHECK-NEXT:    jne .LBB0_1
 ; CHECK-NEXT:  .LBB0_3: # %bb158
 ; CHECK-NEXT:    movb %al, 0
-; CHECK-NEXT:    cmpl $-1, %eax
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    retl
 entry:


        


More information about the llvm-branch-commits mailing list