[PATCH] D94856: [X86] WIP Improve optimizeCompareInstr for signed comparisons after logical operations.

Craig Topper via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 15 19:47:04 PST 2021


craig.topper created this revision.
craig.topper added reviewers: RKSimon, pengfei, spatel.
Herald added a subscriber: hiraditya.
craig.topper requested review of this revision.
Herald added a project: LLVM.

We previously couldn't optimize out a TEST if the branch/setcc/cmov
used the overflow flag. This patches allows the TEST to be removed
if the flag producing instruction is known to clear the OF flag.
Thats what the TEST instruction would have done so that should be
equivalent.

Need to add test cases. I'll try to get back to this if I have bandwidth.

Fixes PR48768.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D94856

Files:
  llvm/lib/Target/X86/X86InstrInfo.cpp


Index: llvm/lib/Target/X86/X86InstrInfo.cpp
===================================================================
--- llvm/lib/Target/X86/X86InstrInfo.cpp
+++ llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3977,8 +3977,10 @@
 
 /// Check whether the definition can be converted
 /// to remove a comparison against zero.
-inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) {
+inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
+                                    bool &ClearsOverflowFlag) {
   NoSignFlag = false;
+  ClearsOverflowFlag = false;
 
   switch (MI.getOpcode()) {
   default: return false;
@@ -4013,6 +4015,21 @@
   case X86::ADD16rr:   case X86::ADD8rr:   case X86::ADD64rm:
   case X86::ADD32rm:   case X86::ADD16rm:  case X86::ADD8rm:
   case X86::INC64r:    case X86::INC32r:   case X86::INC16r: case X86::INC8r:
+  case X86::ADC64ri32: case X86::ADC64ri8: case X86::ADC32ri:
+  case X86::ADC32ri8:  case X86::ADC16ri:  case X86::ADC16ri8:
+  case X86::ADC8ri:    case X86::ADC64rr:  case X86::ADC32rr:
+  case X86::ADC16rr:   case X86::ADC8rr:   case X86::ADC64rm:
+  case X86::ADC32rm:   case X86::ADC16rm:  case X86::ADC8rm:
+  case X86::SBB64ri32: case X86::SBB64ri8: case X86::SBB32ri:
+  case X86::SBB32ri8:  case X86::SBB16ri:  case X86::SBB16ri8:
+  case X86::SBB8ri:    case X86::SBB64rr:  case X86::SBB32rr:
+  case X86::SBB16rr:   case X86::SBB8rr:   case X86::SBB64rm:
+  case X86::SBB32rm:   case X86::SBB16rm:  case X86::SBB8rm:
+  case X86::NEG8r:     case X86::NEG16r:   case X86::NEG32r: case X86::NEG64r:
+  case X86::SAR8r1:    case X86::SAR16r1:  case X86::SAR32r1:case X86::SAR64r1:
+  case X86::SHR8r1:    case X86::SHR16r1:  case X86::SHR32r1:case X86::SHR64r1:
+  case X86::SHL8r1:    case X86::SHL16r1:  case X86::SHL32r1:case X86::SHL64r1:
+     return true;
   case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
   case X86::AND32ri8:  case X86::AND16ri:  case X86::AND16ri8:
   case X86::AND8ri:    case X86::AND64rr:  case X86::AND32rr:
@@ -4028,20 +4045,6 @@
   case X86::OR8ri:     case X86::OR64rr:   case X86::OR32rr:
   case X86::OR16rr:    case X86::OR8rr:    case X86::OR64rm:
   case X86::OR32rm:    case X86::OR16rm:   case X86::OR8rm:
-  case X86::ADC64ri32: case X86::ADC64ri8: case X86::ADC32ri:
-  case X86::ADC32ri8:  case X86::ADC16ri:  case X86::ADC16ri8:
-  case X86::ADC8ri:    case X86::ADC64rr:  case X86::ADC32rr:
-  case X86::ADC16rr:   case X86::ADC8rr:   case X86::ADC64rm:
-  case X86::ADC32rm:   case X86::ADC16rm:  case X86::ADC8rm:
-  case X86::SBB64ri32: case X86::SBB64ri8: case X86::SBB32ri:
-  case X86::SBB32ri8:  case X86::SBB16ri:  case X86::SBB16ri8:
-  case X86::SBB8ri:    case X86::SBB64rr:  case X86::SBB32rr:
-  case X86::SBB16rr:   case X86::SBB8rr:   case X86::SBB64rm:
-  case X86::SBB32rm:   case X86::SBB16rm:  case X86::SBB8rm:
-  case X86::NEG8r:     case X86::NEG16r:   case X86::NEG32r: case X86::NEG64r:
-  case X86::SAR8r1:    case X86::SAR16r1:  case X86::SAR32r1:case X86::SAR64r1:
-  case X86::SHR8r1:    case X86::SHR16r1:  case X86::SHR32r1:case X86::SHR64r1:
-  case X86::SHL8r1:    case X86::SHL16r1:  case X86::SHL32r1:case X86::SHL64r1:
   case X86::ANDN32rr:  case X86::ANDN32rm:
   case X86::ANDN64rr:  case X86::ANDN64rm:
   case X86::BLSI32rr:  case X86::BLSI32rm:
@@ -4079,6 +4082,7 @@
   case X86::T1MSKC64rr:  case X86::T1MSKC64rm:
   case X86::TZMSK32rr:   case X86::TZMSK32rm:
   case X86::TZMSK64rr:   case X86::TZMSK64rm:
+    ClearsOverflowFlag = true;
     return true;
   case X86::BEXTR32rr:   case X86::BEXTR64rr:
   case X86::BEXTR32rm:   case X86::BEXTR64rm:
@@ -4204,8 +4208,9 @@
   // right way.
   bool ShouldUpdateCC = false;
   bool NoSignFlag = false;
+  bool ClearsOverflowFlag = false;
   X86::CondCode NewCC = X86::COND_INVALID;
-  if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag)) {
+  if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag, ClearsOverflowFlag)) {
     // Scan forward from the use until we hit the use we're looking for or the
     // compare instruction.
     for (MachineBasicBlock::iterator J = MI;; ++J) {
@@ -4317,11 +4322,15 @@
       default: break;
       case X86::COND_A: case X86::COND_AE:
       case X86::COND_B: case X86::COND_BE:
+        // CF is used, we can't perform this optimization.
+        return false;
       case X86::COND_G: case X86::COND_GE:
       case X86::COND_L: case X86::COND_LE:
       case X86::COND_O: case X86::COND_NO:
-        // CF and OF are used, we can't perform this optimization.
-        return false;
+        // If OF is used, the instruction needs to clear it like CmpZero does.
+        if (!ClearsOverflowFlag)
+          return false;
+        break;
       case X86::COND_S: case X86::COND_NS:
         // If SF is used, but the instruction doesn't update the SF, then we
         // can't do the optimization.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D94856.317145.patch
Type: text/x-patch
Size: 4873 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210116/f6282533/attachment.bin>


More information about the llvm-commits mailing list