[llvm] 4b75d67 - X86InstrInfo: Look across basic blocks in optimizeCompareInstr
Matthias Braun via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 24 16:44:00 PDT 2021
Author: Matthias Braun
Date: 2021-10-24T16:22:45-07:00
New Revision: 4b75d674f899fe2fd22ffe2dee849dbdf015fd9c
URL: https://github.com/llvm/llvm-project/commit/4b75d674f899fe2fd22ffe2dee849dbdf015fd9c
DIFF: https://github.com/llvm/llvm-project/commit/4b75d674f899fe2fd22ffe2dee849dbdf015fd9c.diff
LOG: X86InstrInfo: Look across basic blocks in optimizeCompareInstr
This extends `optimizeCompareInstr` to continue the backwards search
when it reached the beginning of a basic block. If there is a single
predecessor block then we can just continue the search in that block and
mark the EFLAGS register as live-in.
Differential Revision: https://reviews.llvm.org/D110862
Added:
Modified:
llvm/lib/Target/X86/X86InstrInfo.cpp
llvm/test/CodeGen/X86/jump_sign.ll
llvm/test/CodeGen/X86/optimize-compare.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index cc2d425eadd9..9c1552ee9084 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4295,66 +4295,76 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
assert(SrcRegDef && "Must have a definition (SSA)");
- for (MachineInstr &Inst : make_range(From, CmpMBB.rend())) {
- // Try to use EFLAGS from the instruction defining %SrcReg. Example:
- // %eax = addl ...
- // ... // EFLAGS not changed
- // testl %eax, %eax // <-- can be removed
- if (&Inst == SrcRegDef) {
- if (IsCmpZero && isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
- MI = &Inst;
- break;
- }
- // Cannot find other candidates before definition of SrcReg.
- return false;
- }
-
- if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
- // Try to use EFLAGS produced by an instruction reading %SrcReg. Example:
- // %eax = ...
- // ...
- // popcntl %eax
- // ... // EFLAGS not changed
- // testl %eax, %eax // <-- can be removed
- if (IsCmpZero) {
- NewCC = isUseDefConvertible(Inst);
- if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
- Inst.getOperand(1).getReg() == SrcReg) {
- ShouldUpdateCC = true;
+ for (MachineBasicBlock *MBB = &CmpMBB;;) {
+ for (MachineInstr &Inst : make_range(From, MBB->rend())) {
+ // Try to use EFLAGS from the instruction defining %SrcReg. Example:
+ // %eax = addl ...
+ // ... // EFLAGS not changed
+ // testl %eax, %eax // <-- can be removed
+ if (&Inst == SrcRegDef) {
+ if (IsCmpZero &&
+ isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
MI = &Inst;
break;
}
+ // Cannot find other candidates before definition of SrcReg.
+ return false;
}
- // Try to use EFLAGS from an instruction with similar flag results.
- // Example:
- // sub x, y or cmp x, y
- // ... // EFLAGS not changed
- // cmp x, y // <-- can be removed
- if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
- CmpValue, Inst)) {
- Sub = &Inst;
- break;
- }
+ if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
+ // Try to use EFLAGS produced by an instruction reading %SrcReg.
+ // Example:
+ // %eax = ...
+ // ...
+ // popcntl %eax
+ // ... // EFLAGS not changed
+ // testl %eax, %eax // <-- can be removed
+ if (IsCmpZero) {
+ NewCC = isUseDefConvertible(Inst);
+ if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
+ Inst.getOperand(1).getReg() == SrcReg) {
+ ShouldUpdateCC = true;
+ MI = &Inst;
+ break;
+ }
+ }
- // MOV32r0 is implemented with xor which clobbers condition code. It is
- // safe to move up, if the definition to EFLAGS is dead and earlier
- // instructions do not read or write EFLAGS.
- if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
- Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
- Movr0Inst = &Inst;
- continue;
+ // Try to use EFLAGS from an instruction with similar flag results.
+ // Example:
+ // sub x, y or cmp x, y
+ // ... // EFLAGS not changed
+ // cmp x, y // <-- can be removed
+ if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2,
+ CmpMask, CmpValue, Inst)) {
+ Sub = &Inst;
+ break;
+ }
+
+ // MOV32r0 is implemented with xor which clobbers condition code. It is
+ // safe to move up, if the definition to EFLAGS is dead and earlier
+ // instructions do not read or write EFLAGS.
+ if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
+ Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
+ Movr0Inst = &Inst;
+ continue;
+ }
+
+ // Cannot do anything for any other EFLAG changes.
+ return false;
}
+ }
- // Cannot do anything for any other EFLAG changes.
+ if (MI || Sub)
+ break;
+
+ // Reached begin of basic block. Continue in predecessor if there is
+ // exactly one.
+ if (MBB->pred_size() != 1)
return false;
- }
+ MBB = *MBB->pred_begin();
+ From = MBB->rbegin();
}
- // Return false if no candidates exist.
- if (!MI && !Sub)
- return false;
-
bool IsSwapped =
(SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
Sub->getOperand(2).getReg() == SrcReg);
@@ -4459,8 +4469,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// The instruction to be updated is either Sub or MI.
Sub = IsCmpZero ? MI : Sub;
+ MachineBasicBlock *SubBB = Sub->getParent();
// Move Movr0Inst to the appropriate place before Sub.
if (Movr0Inst) {
+ // Only move within the same block so we don't accidentally move to a
+ // block with higher execution frequency.
+ if (&CmpMBB != SubBB)
+ return false;
// Look backwards until we find a def that doesn't use the current EFLAGS.
MachineBasicBlock::reverse_iterator InsertI = Sub,
InsertE = Sub->getParent()->rend();
@@ -4468,7 +4483,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
MachineInstr *Instr = &*InsertI;
if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
Instr->modifiesRegister(X86::EFLAGS, TRI)) {
- Sub->getParent()->remove(Movr0Inst);
+ Movr0Inst->getParent()->remove(Movr0Inst);
Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
Movr0Inst);
break;
@@ -4490,6 +4505,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
.setImm(Op.second);
}
+ // Add EFLAGS to block live-ins between CmpBB and block of flags producer.
+ for (MachineBasicBlock *MBB = &CmpMBB; MBB != SubBB;
+ MBB = *MBB->pred_begin()) {
+ assert(MBB->pred_size() == 1 && "Expected exactly one predecessor");
+ if (!MBB->isLiveIn(X86::EFLAGS))
+ MBB->addLiveIn(X86::EFLAGS);
+ }
return true;
}
diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll
index b436b1f35c8f..848ebc97a1ac 100644
--- a/llvm/test/CodeGen/X86/jump_sign.ll
+++ b/llvm/test/CodeGen/X86/jump_sign.ll
@@ -130,20 +130,21 @@ define i32 @func_m(i32 %a, i32 %b) nounwind {
ret i32 %cond
}
-; If EFLAGS is live-out, we can't remove cmp if there exists
-; a swapped sub.
+; (This used to test that an unsafe removal of cmp in bb.0 is not happening,
+; but now we can do so safely).
define i32 @func_l2(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: func_l2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl %ecx, %eax
-; CHECK-NEXT: subl %edx, %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: subl %edx, %ecx
; CHECK-NEXT: jne .LBB8_2
; CHECK-NEXT: # %bb.1: # %if.then
-; CHECK-NEXT: cmpl %ecx, %edx
-; CHECK-NEXT: cmovlel %ecx, %eax
+; CHECK-NEXT: cmovll %ecx, %eax
+; CHECK-NEXT: retl
; CHECK-NEXT: .LBB8_2: # %if.else
+; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: retl
%cmp = icmp eq i32 %b, %a
%sub = sub nsw i32 %a, %b
diff --git a/llvm/test/CodeGen/X86/optimize-compare.mir b/llvm/test/CodeGen/X86/optimize-compare.mir
index 8a9c6504698b..5b8ca4c43392 100644
--- a/llvm/test/CodeGen/X86/optimize-compare.mir
+++ b/llvm/test/CodeGen/X86/optimize-compare.mir
@@ -32,6 +32,108 @@ body: |
$al = SETCCr 4, implicit $eflags
...
---
+name: opt_multiple_blocks
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: opt_multiple_blocks
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
+ ; CHECK-NEXT: [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags
+ ; CHECK-NEXT: PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp
+ ; CHECK-NEXT: $rcx = POP64r implicit-def $rsp, implicit $rsp
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg
+ ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
+ bb.0:
+ %0:gr64 = COPY undef $rsi
+ %1:gr64 = INC64r %0, implicit-def $eflags
+ PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp
+ $rcx = POP64r implicit-def $rsp, implicit $rsp
+ JMP_1 %bb.1
+
+ bb.1:
+ ; TEST should be removed.
+ TEST64rr %1, %1, implicit-def $eflags
+ %2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
+ $al = SETCCr 4, implicit $eflags
+...
+---
+name: opt_multiple_blocks_noopt_0
+body: |
+ ; CHECK-LABEL: name: opt_multiple_blocks_noopt_0
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
+ ; CHECK-NEXT: [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: TEST64rr [[INC64r]], [[INC64r]], implicit-def $eflags
+ ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg
+ ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: JCC_1 %bb.1, 2, implicit $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ bb.0:
+ %0:gr64 = COPY undef $rsi
+ %1:gr64 = INC64r %0, implicit-def $eflags
+ JMP_1 %bb.1
+
+ bb.1:
+ ; The TEST64rr should not be removed, since there are multiple preds.
+ TEST64rr %1, %1, implicit-def $eflags
+ %2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
+ $al = SETCCr 4, implicit $eflags
+ JCC_1 %bb.1, 2, implicit $eflags
+
+ bb.2:
+...
+---
+name: opt_multiple_blocks_noopt_1
+body: |
+ ; CHECK-LABEL: name: opt_multiple_blocks_noopt_1
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
+ ; CHECK-NEXT: TEST64rr [[COPY]], [[COPY]], implicit-def $eflags
+ ; CHECK-NEXT: JCC_1 %bb.1, 2, implicit $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags
+ ; CHECK-NEXT: TEST64rr [[COPY]], [[COPY]], implicit-def $eflags
+ ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
+ bb.0:
+ JMP_1 %bb.1
+
+ bb.1:
+ %0:gr64 = COPY undef $rsi
+ TEST64rr %0, %0, implicit-def $eflags
+ JCC_1 %bb.1, 2, implicit $eflags
+
+ bb.2:
+ ; We should not move MOV32r0 up into the loop (that would be correct but
+ ; slow).
+ %1:gr32 = MOV32r0 implicit-def $eflags
+ ; TEST should not be removed because of MOV32r0.
+ TEST64rr %0, %0, implicit-def $eflags
+ $al = SETCCr 4, implicit $eflags
+...
+---
name: opt_zerocmp_user_0
body: |
bb.0:
More information about the llvm-commits
mailing list