[llvm] 4b75d67 - X86InstrInfo: Look across basic blocks in optimizeCompareInstr

Matthias Braun via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 24 16:44:00 PDT 2021


Author: Matthias Braun
Date: 2021-10-24T16:22:45-07:00
New Revision: 4b75d674f899fe2fd22ffe2dee849dbdf015fd9c

URL: https://github.com/llvm/llvm-project/commit/4b75d674f899fe2fd22ffe2dee849dbdf015fd9c
DIFF: https://github.com/llvm/llvm-project/commit/4b75d674f899fe2fd22ffe2dee849dbdf015fd9c.diff

LOG: X86InstrInfo: Look across basic blocks in optimizeCompareInstr

This extends `optimizeCompareInstr` to continue the backwards search
when it reached the beginning of a basic block. If there is a single
predecessor block then we can just continue the search in that block and
mark the EFLAGS register as live-in.

Differential Revision: https://reviews.llvm.org/D110862

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86InstrInfo.cpp
    llvm/test/CodeGen/X86/jump_sign.ll
    llvm/test/CodeGen/X86/optimize-compare.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index cc2d425eadd9..9c1552ee9084 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4295,66 +4295,76 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
       std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
   MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
   assert(SrcRegDef && "Must have a definition (SSA)");
-  for (MachineInstr &Inst : make_range(From, CmpMBB.rend())) {
-    // Try to use EFLAGS from the instruction defining %SrcReg. Example:
-    //     %eax = addl ...
-    //     ...                // EFLAGS not changed
-    //     testl %eax, %eax   // <-- can be removed
-    if (&Inst == SrcRegDef) {
-      if (IsCmpZero && isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
-        MI = &Inst;
-        break;
-      }
-      // Cannot find other candidates before definition of SrcReg.
-      return false;
-    }
-
-    if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
-      // Try to use EFLAGS produced by an instruction reading %SrcReg. Example:
-      //      %eax = ...
-      //      ...
-      //      popcntl %eax
-      //      ...                 // EFLAGS not changed
-      //      testl %eax, %eax    // <-- can be removed
-      if (IsCmpZero) {
-        NewCC = isUseDefConvertible(Inst);
-        if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
-            Inst.getOperand(1).getReg() == SrcReg) {
-          ShouldUpdateCC = true;
+  for (MachineBasicBlock *MBB = &CmpMBB;;) {
+    for (MachineInstr &Inst : make_range(From, MBB->rend())) {
+      // Try to use EFLAGS from the instruction defining %SrcReg. Example:
+      //     %eax = addl ...
+      //     ...                // EFLAGS not changed
+      //     testl %eax, %eax   // <-- can be removed
+      if (&Inst == SrcRegDef) {
+        if (IsCmpZero &&
+            isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
           MI = &Inst;
           break;
         }
+        // Cannot find other candidates before definition of SrcReg.
+        return false;
       }
 
-      // Try to use EFLAGS from an instruction with similar flag results.
-      // Example:
-      //     sub x, y  or  cmp x, y
-      //     ...           // EFLAGS not changed
-      //     cmp x, y      // <-- can be removed
-      if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
-                                             CmpValue, Inst)) {
-        Sub = &Inst;
-        break;
-      }
+      if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
+        // Try to use EFLAGS produced by an instruction reading %SrcReg.
+        // Example:
+        //      %eax = ...
+        //      ...
+        //      popcntl %eax
+        //      ...                 // EFLAGS not changed
+        //      testl %eax, %eax    // <-- can be removed
+        if (IsCmpZero) {
+          NewCC = isUseDefConvertible(Inst);
+          if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
+              Inst.getOperand(1).getReg() == SrcReg) {
+            ShouldUpdateCC = true;
+            MI = &Inst;
+            break;
+          }
+        }
 
-      // MOV32r0 is implemented with xor which clobbers condition code. It is
-      // safe to move up, if the definition to EFLAGS is dead and earlier
-      // instructions do not read or write EFLAGS.
-      if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
-          Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
-        Movr0Inst = &Inst;
-        continue;
+        // Try to use EFLAGS from an instruction with similar flag results.
+        // Example:
+        //     sub x, y  or  cmp x, y
+        //     ...           // EFLAGS not changed
+        //     cmp x, y      // <-- can be removed
+        if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2,
+                                               CmpMask, CmpValue, Inst)) {
+          Sub = &Inst;
+          break;
+        }
+
+        // MOV32r0 is implemented with xor which clobbers condition code. It is
+        // safe to move up, if the definition to EFLAGS is dead and earlier
+        // instructions do not read or write EFLAGS.
+        if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
+            Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
+          Movr0Inst = &Inst;
+          continue;
+        }
+
+        // Cannot do anything for any other EFLAG changes.
+        return false;
       }
+    }
 
-      // Cannot do anything for any other EFLAG changes.
+    if (MI || Sub)
+      break;
+
+    // Reached begin of basic block. Continue in predecessor if there is
+    // exactly one.
+    if (MBB->pred_size() != 1)
       return false;
-    }
+    MBB = *MBB->pred_begin();
+    From = MBB->rbegin();
   }
 
-  // Return false if no candidates exist.
-  if (!MI && !Sub)
-    return false;
-
   bool IsSwapped =
       (SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
        Sub->getOperand(2).getReg() == SrcReg);
@@ -4459,8 +4469,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
 
   // The instruction to be updated is either Sub or MI.
   Sub = IsCmpZero ? MI : Sub;
+  MachineBasicBlock *SubBB = Sub->getParent();
   // Move Movr0Inst to the appropriate place before Sub.
   if (Movr0Inst) {
+    // Only move within the same block so we don't accidentally move to a
+    // block with higher execution frequency.
+    if (&CmpMBB != SubBB)
+      return false;
     // Look backwards until we find a def that doesn't use the current EFLAGS.
     MachineBasicBlock::reverse_iterator InsertI = Sub,
                                         InsertE = Sub->getParent()->rend();
@@ -4468,7 +4483,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
       MachineInstr *Instr = &*InsertI;
       if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
           Instr->modifiesRegister(X86::EFLAGS, TRI)) {
-        Sub->getParent()->remove(Movr0Inst);
+        Movr0Inst->getParent()->remove(Movr0Inst);
         Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
                                    Movr0Inst);
         break;
@@ -4490,6 +4505,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
         .setImm(Op.second);
   }
+  // Add EFLAGS to block live-ins between CmpBB and block of flags producer.
+  for (MachineBasicBlock *MBB = &CmpMBB; MBB != SubBB;
+       MBB = *MBB->pred_begin()) {
+    assert(MBB->pred_size() == 1 && "Expected exactly one predecessor");
+    if (!MBB->isLiveIn(X86::EFLAGS))
+      MBB->addLiveIn(X86::EFLAGS);
+  }
   return true;
 }
 

diff  --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll
index b436b1f35c8f..848ebc97a1ac 100644
--- a/llvm/test/CodeGen/X86/jump_sign.ll
+++ b/llvm/test/CodeGen/X86/jump_sign.ll
@@ -130,20 +130,21 @@ define i32 @func_m(i32 %a, i32 %b) nounwind {
   ret i32 %cond
 }
 
-; If EFLAGS is live-out, we can't remove cmp if there exists
-; a swapped sub.
+; (This used to test that an unsafe removal of cmp in bb.0 is not happening,
+;  but now we can do so safely).
 define i32 @func_l2(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: func_l2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movl %ecx, %eax
-; CHECK-NEXT:    subl %edx, %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    subl %edx, %ecx
 ; CHECK-NEXT:    jne .LBB8_2
 ; CHECK-NEXT:  # %bb.1: # %if.then
-; CHECK-NEXT:    cmpl %ecx, %edx
-; CHECK-NEXT:    cmovlel %ecx, %eax
+; CHECK-NEXT:    cmovll %ecx, %eax
+; CHECK-NEXT:    retl
 ; CHECK-NEXT:  .LBB8_2: # %if.else
+; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    retl
   %cmp = icmp eq i32 %b, %a
   %sub = sub nsw i32 %a, %b

diff  --git a/llvm/test/CodeGen/X86/optimize-compare.mir b/llvm/test/CodeGen/X86/optimize-compare.mir
index 8a9c6504698b..5b8ca4c43392 100644
--- a/llvm/test/CodeGen/X86/optimize-compare.mir
+++ b/llvm/test/CodeGen/X86/optimize-compare.mir
@@ -32,6 +32,108 @@ body: |
     $al = SETCCr 4, implicit $eflags
 ...
 ---
+name: opt_multiple_blocks
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: opt_multiple_blocks
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
+  ; CHECK-NEXT:   [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags
+  ; CHECK-NEXT:   PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp
+  ; CHECK-NEXT:   $rcx = POP64r implicit-def $rsp, implicit $rsp
+  ; CHECK-NEXT:   JMP_1 %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg
+  ; CHECK-NEXT:   $al = SETCCr 4, implicit $eflags
+  bb.0:
+    %0:gr64 = COPY undef $rsi
+    %1:gr64 = INC64r %0, implicit-def $eflags
+    PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp
+    $rcx = POP64r implicit-def $rsp, implicit $rsp
+    JMP_1 %bb.1
+
+  bb.1:
+    ; TEST should be removed.
+    TEST64rr %1, %1, implicit-def $eflags
+    %2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
+    $al = SETCCr 4, implicit $eflags
+...
+---
+name: opt_multiple_blocks_noopt_0
+body: |
+  ; CHECK-LABEL: name: opt_multiple_blocks_noopt_0
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
+  ; CHECK-NEXT:   [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags
+  ; CHECK-NEXT:   JMP_1 %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   TEST64rr [[INC64r]], [[INC64r]], implicit-def $eflags
+  ; CHECK-NEXT:   [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg
+  ; CHECK-NEXT:   $al = SETCCr 4, implicit $eflags
+  ; CHECK-NEXT:   JCC_1 %bb.1, 2, implicit $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  bb.0:
+    %0:gr64 = COPY undef $rsi
+    %1:gr64 = INC64r %0, implicit-def $eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    ; The TEST64rr should not be removed, since there are multiple preds.
+    TEST64rr %1, %1, implicit-def $eflags
+    %2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
+    $al = SETCCr 4, implicit $eflags
+    JCC_1 %bb.1, 2, implicit $eflags
+
+  bb.2:
+...
+---
+name: opt_multiple_blocks_noopt_1
+body: |
+  ; CHECK-LABEL: name: opt_multiple_blocks_noopt_1
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   JMP_1 %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
+  ; CHECK-NEXT:   TEST64rr [[COPY]], [[COPY]], implicit-def $eflags
+  ; CHECK-NEXT:   JCC_1 %bb.1, 2, implicit $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags
+  ; CHECK-NEXT:   TEST64rr [[COPY]], [[COPY]], implicit-def $eflags
+  ; CHECK-NEXT:   $al = SETCCr 4, implicit $eflags
+  bb.0:
+    JMP_1 %bb.1
+
+  bb.1:
+    %0:gr64 = COPY undef $rsi
+    TEST64rr %0, %0, implicit-def $eflags
+    JCC_1 %bb.1, 2, implicit $eflags
+
+  bb.2:
+    ; We should not move MOV32r0 up into the loop (that would be correct but
+    ; slow).
+    %1:gr32 = MOV32r0 implicit-def $eflags
+    ; TEST should not be removed because of MOV32r0.
+    TEST64rr %0, %0, implicit-def $eflags
+    $al = SETCCr 4, implicit $eflags
+...
+---
 name: opt_zerocmp_user_0
 body: |
   bb.0:


        


More information about the llvm-commits mailing list