[llvm] 683994c - X86InstrInfo: Refactor and cleanup optimizeCompareInstr

Matthias Braun via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 24 16:43:58 PDT 2021


Author: Matthias Braun
Date: 2021-10-24T16:22:45-07:00
New Revision: 683994c863b8b8a46372408773e4bd65502c5193

URL: https://github.com/llvm/llvm-project/commit/683994c863b8b8a46372408773e4bd65502c5193
DIFF: https://github.com/llvm/llvm-project/commit/683994c863b8b8a46372408773e4bd65502c5193.diff

LOG: X86InstrInfo: Refactor and cleanup optimizeCompareInstr

This changes the first part of `optimizeCompareInstr` being split into a
loop with a forward scan for cases that re-use zero flags from a
producer in case of compare with zero and a backward scan for finding an
instruction equivalent to a compare.

The code now uses a single backward scan searching for the next
instructions that reads or writes EFLAGS.

Also:
- Add comments giving examples for the 3 cases handled.
- Check `MI` which contains the result of the zero-compare cases,
  instead of re-checking `IsCmpZero`.
- Tweak coding style in some loops.
- Add new MIR based tests that test the optimization in isolation.

This also removes a check for flag readers in situations like this:
```
= SUB32rr %0, %1, implicit-def $eflags
...  we no longer stop when there are $eflag users here
CMP32rr %0, %1   ; will be removed
...
```

Differential Revision: https://reviews.llvm.org/D110857

Added: 
    llvm/test/CodeGen/X86/optimize-compare.mir

Modified: 
    llvm/lib/Target/X86/X86InstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 3526955f2814..cc2d425eadd9 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4275,93 +4275,84 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   }
   }
 
-  // Get the unique definition of SrcReg.
-  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
-  if (!MI) return false;
+  // The following code tries to remove the comparison by re-using EFLAGS
+  // from earlier instructions.
 
-  // CmpInstr is the first instruction of the BB.
-  MachineBasicBlock::iterator I = CmpInstr, Def = MI;
-
-  // If we are comparing against zero, check whether we can use MI to update
-  // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
   bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
-  if (IsCmpZero && MI->getParent() != CmpInstr.getParent())
-    return false;
 
-  // If we have a use of the source register between the def and our compare
-  // instruction we can eliminate the compare iff the use sets EFLAGS in the
-  // right way.
-  bool ShouldUpdateCC = false;
+  MachineInstr *MI = nullptr;
+  MachineInstr *Sub = nullptr;
+  MachineInstr *Movr0Inst = nullptr;
   bool NoSignFlag = false;
   bool ClearsOverflowFlag = false;
+  bool ShouldUpdateCC = false;
   X86::CondCode NewCC = X86::COND_INVALID;
-  if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag, ClearsOverflowFlag)) {
-    // Scan forward from the use until we hit the use we're looking for or the
-    // compare instruction.
-    for (MachineBasicBlock::iterator J = MI;; ++J) {
-      // Do we have a convertible instruction?
-      NewCC = isUseDefConvertible(*J);
-      if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() &&
-          J->getOperand(1).getReg() == SrcReg) {
-        assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!");
-        ShouldUpdateCC = true; // Update CC later on.
-        // This is not a def of SrcReg, but still a def of EFLAGS. Keep going
-        // with the new def.
-        Def = J;
-        MI = &*Def;
+
+  // Search backward from CmpInstr for the next instruction defining EFLAGS.
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  MachineBasicBlock &CmpMBB = *CmpInstr.getParent();
+  MachineBasicBlock::reverse_iterator From =
+      std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
+  MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
+  assert(SrcRegDef && "Must have a definition (SSA)");
+  for (MachineInstr &Inst : make_range(From, CmpMBB.rend())) {
+    // Try to use EFLAGS from the instruction defining %SrcReg. Example:
+    //     %eax = addl ...
+    //     ...                // EFLAGS not changed
+    //     testl %eax, %eax   // <-- can be removed
+    if (&Inst == SrcRegDef) {
+      if (IsCmpZero && isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
+        MI = &Inst;
         break;
       }
-
-      if (J == I)
-        return false;
+      // Cannot find other candidates before definition of SrcReg.
+      return false;
     }
-  }
 
-  // We are searching for an earlier instruction that can make CmpInstr
-  // redundant and that instruction will be saved in Sub.
-  MachineInstr *Sub = nullptr;
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
-
-  // We iterate backward, starting from the instruction before CmpInstr and
-  // stop when reaching the definition of a source register or done with the BB.
-  // RI points to the instruction before CmpInstr.
-  // If the definition is in this basic block, RE points to the definition;
-  // otherwise, RE is the rend of the basic block.
-  MachineBasicBlock::reverse_iterator
-      RI = ++I.getReverse(),
-      RE = CmpInstr.getParent() == MI->getParent()
-               ? Def.getReverse() /* points to MI */
-               : CmpInstr.getParent()->rend();
-  MachineInstr *Movr0Inst = nullptr;
-  for (; RI != RE; ++RI) {
-    MachineInstr &Instr = *RI;
-    // Check whether CmpInstr can be made redundant by the current instruction.
-    if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
-                                           CmpValue, Instr)) {
-      Sub = &Instr;
-      break;
-    }
+    if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
+      // Try to use EFLAGS produced by an instruction reading %SrcReg. Example:
+      //      %eax = ...
+      //      ...
+      //      popcntl %eax
+      //      ...                 // EFLAGS not changed
+      //      testl %eax, %eax    // <-- can be removed
+      if (IsCmpZero) {
+        NewCC = isUseDefConvertible(Inst);
+        if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
+            Inst.getOperand(1).getReg() == SrcReg) {
+          ShouldUpdateCC = true;
+          MI = &Inst;
+          break;
+        }
+      }
 
-    if (Instr.modifiesRegister(X86::EFLAGS, TRI) ||
-        Instr.readsRegister(X86::EFLAGS, TRI)) {
-      // This instruction modifies or uses EFLAGS.
+      // Try to use EFLAGS from an instruction with similar flag results.
+      // Example:
+      //     sub x, y  or  cmp x, y
+      //     ...           // EFLAGS not changed
+      //     cmp x, y      // <-- can be removed
+      if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
+                                             CmpValue, Inst)) {
+        Sub = &Inst;
+        break;
+      }
 
-      // MOV32r0 etc. are implemented with xor which clobbers condition code.
-      // They are safe to move up, if the definition to EFLAGS is dead and
-      // earlier instructions do not read or write EFLAGS.
-      if (!Movr0Inst && Instr.getOpcode() == X86::MOV32r0 &&
-          Instr.registerDefIsDead(X86::EFLAGS, TRI)) {
-        Movr0Inst = &Instr;
+      // MOV32r0 is implemented with xor which clobbers condition code. It is
+      // safe to move up, if the definition to EFLAGS is dead and earlier
+      // instructions do not read or write EFLAGS.
+      if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
+          Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
+        Movr0Inst = &Inst;
         continue;
       }
 
-      // We can't remove CmpInstr.
+      // Cannot do anything for any other EFLAG changes.
       return false;
     }
   }
 
   // Return false if no candidates exist.
-  if (!IsCmpZero && !Sub)
+  if (!MI && !Sub)
     return false;
 
   bool IsSwapped =
@@ -4374,9 +4365,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   // live-out.
   bool IsSafe = false;
   SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate;
-  MachineBasicBlock::iterator E = CmpInstr.getParent()->end();
-  for (++I; I != E; ++I) {
-    const MachineInstr &Instr = *I;
+  MachineBasicBlock::iterator AfterCmpInstr =
+      std::next(MachineBasicBlock::iterator(CmpInstr));
+  for (MachineInstr &Instr : make_range(AfterCmpInstr, CmpMBB.end())) {
     bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
     bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
     // We should check the usage if this instruction uses and updates EFLAGS.
@@ -4449,7 +4440,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
       // Push the MachineInstr to OpsToUpdate.
       // If it is safe to remove CmpInstr, the condition code of these
       // instructions will be modified.
-      OpsToUpdate.push_back(std::make_pair(&*I, ReplacementCC));
+      OpsToUpdate.push_back(std::make_pair(&Instr, ReplacementCC));
     }
     if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
       // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
@@ -4461,8 +4452,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   // If EFLAGS is not killed nor re-defined, we should check whether it is
   // live-out. If it is live-out, do not optimize.
   if ((IsCmpZero || IsSwapped) && !IsSafe) {
-    MachineBasicBlock *MBB = CmpInstr.getParent();
-    for (MachineBasicBlock *Successor : MBB->successors())
+    for (MachineBasicBlock *Successor : CmpMBB.successors())
       if (Successor->isLiveIn(X86::EFLAGS))
         return false;
   }
@@ -4472,8 +4462,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   // Move Movr0Inst to the appropriate place before Sub.
   if (Movr0Inst) {
     // Look backwards until we find a def that doesn't use the current EFLAGS.
-    Def = Sub;
-    MachineBasicBlock::reverse_iterator InsertI = Def.getReverse(),
+    MachineBasicBlock::reverse_iterator InsertI = Sub,
                                         InsertE = Sub->getParent()->rend();
     for (; InsertI != InsertE; ++InsertI) {
       MachineInstr *Instr = &*InsertI;

diff  --git a/llvm/test/CodeGen/X86/optimize-compare.mir b/llvm/test/CodeGen/X86/optimize-compare.mir
new file mode 100644
index 000000000000..8a9c6504698b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/optimize-compare.mir
@@ -0,0 +1,103 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - %s -mtriple=x86_64-- -run-pass peephole-opt | FileCheck %s
+
+---
+name: opt_zerocmp_0
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_zerocmp_0
+    ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
+    ; CHECK-NEXT: [[XOR32ri:%[0-9]+]]:gr32 = XOR32ri [[COPY]], i32 1234, implicit-def $eflags
+    ; CHECK-NEXT: $al = SETCCr 5, implicit $eflags
+    %0:gr32 = COPY $esi
+    %1:gr32 = XOR32ri %0, i32 1234, implicit-def $eflags
+    ; TEST should be removed.
+    TEST32rr %1, %1, implicit-def $eflags
+    $al = SETCCr 5, implicit $eflags
+...
+---
+name: opt_zerocmp_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_zerocmp_1
+    ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+    ; CHECK-NEXT: [[DEC64r:%[0-9]+]]:gr64 = DEC64r [[COPY]], implicit-def $eflags
+    ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[DEC64r]], 5, $noreg, 12, $noreg
+    ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
+    %0:gr64 = COPY $rsi
+    %1:gr64 = DEC64r %0, implicit-def $eflags
+    ; CMP should be removed.
+    CMP64ri8 %1, 0, implicit-def $eflags
+    %2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
+    $al = SETCCr 4, implicit $eflags
+...
+---
+name: opt_zerocmp_user_0
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_zerocmp_user_0
+    ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
+    ; CHECK-NEXT: [[NEG32r:%[0-9]+]]:gr32 = NEG32r [[COPY]], implicit-def $eflags
+    ; CHECK-NEXT: $al = SETCCr 3, implicit $eflags
+    %0:gr32 = COPY $esi
+    %1:gr32 = NEG32r %0, implicit-def dead $eflags
+    ; TEST should be removed.
+    TEST32rr %0, %0, implicit-def $eflags
+    $al = SETCCr 4, implicit $eflags
+...
+---
+name: opt_redundant_flags_0
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_redundant_flags_0
+    ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
+    ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[COPY]], [[COPY1]], implicit-def $eflags
+    ; CHECK-NEXT: $eax = COPY [[SUB32rr]]
+    ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+    %0:gr32 = COPY $esi
+    %1:gr32 = COPY $edi
+    %2:gr32 = SUB32rr %0, %1, implicit-def dead $eflags
+    $eax = COPY %2
+    ; CMP should be removed.
+    CMP32rr %0, %1, implicit-def $eflags
+    $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_redundant_flags_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_redundant_flags_1
+    ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
+    ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[COPY]], [[COPY1]], implicit-def $eflags
+    ; CHECK-NEXT: $eax = COPY [[SUB32rr]]
+    ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+    %0:gr32 = COPY $esi
+    %1:gr32 = COPY $edi
+    %2:gr32 = SUB32rr %0, %1, implicit-def dead $eflags
+    $eax = COPY %2
+    ; CMP should be removed.
+    CMP32rr %1, %0, implicit-def $eflags
+    $bl = SETCCr 3, implicit $eflags
+...
+---
+name: opt_redundant_flags_2
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_redundant_flags_2
+    ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
+    ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[COPY]], [[COPY1]], implicit-def $eflags
+    ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
+    ; CHECK-NEXT: $eax = COPY [[SUB32rr]]
+    ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+    %0:gr32 = COPY $esi
+    %1:gr32 = COPY $edi
+    %2:gr32 = SUB32rr %0, %1, implicit-def $eflags
+    ; an extra eflags reader shouldn't stop optimization.
+    $cl = SETCCr 2, implicit $eflags
+    $eax = COPY %2
+    CMP32rr %0, %1, implicit-def $eflags
+    $bl = SETCCr 2, implicit $eflags
+...


        


More information about the llvm-commits mailing list