[llvm] 683994c - X86InstrInfo: Refactor and cleanup optimizeCompareInstr
Matthias Braun via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 24 16:43:58 PDT 2021
Author: Matthias Braun
Date: 2021-10-24T16:22:45-07:00
New Revision: 683994c863b8b8a46372408773e4bd65502c5193
URL: https://github.com/llvm/llvm-project/commit/683994c863b8b8a46372408773e4bd65502c5193
DIFF: https://github.com/llvm/llvm-project/commit/683994c863b8b8a46372408773e4bd65502c5193.diff
LOG: X86InstrInfo: Refactor and cleanup optimizeCompareInstr
This changes the first part of `optimizeCompareInstr` being split into a
loop with a forward scan for cases that re-use zero flags from a
producer in case of compare with zero and a backward scan for finding an
instruction equivalent to a compare.
The code now uses a single backward scan searching for the next
instructions that reads or writes EFLAGS.
Also:
- Add comments giving examples for the 3 cases handled.
- Check `MI` which contains the result of the zero-compare cases,
instead of re-checking `IsCmpZero`.
- Tweak coding style in some loops.
- Add new MIR based tests that test the optimization in isolation.
This also removes a check for flag readers in situations like this:
```
= SUB32rr %0, %1, implicit-def $eflags
... we no longer stop when there are $eflag users here
CMP32rr %0, %1 ; will be removed
...
```
Differential Revision: https://reviews.llvm.org/D110857
Added:
llvm/test/CodeGen/X86/optimize-compare.mir
Modified:
llvm/lib/Target/X86/X86InstrInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 3526955f2814..cc2d425eadd9 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4275,93 +4275,84 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
}
}
- // Get the unique definition of SrcReg.
- MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
- if (!MI) return false;
+ // The following code tries to remove the comparison by re-using EFLAGS
+ // from earlier instructions.
- // CmpInstr is the first instruction of the BB.
- MachineBasicBlock::iterator I = CmpInstr, Def = MI;
-
- // If we are comparing against zero, check whether we can use MI to update
- // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
- if (IsCmpZero && MI->getParent() != CmpInstr.getParent())
- return false;
- // If we have a use of the source register between the def and our compare
- // instruction we can eliminate the compare iff the use sets EFLAGS in the
- // right way.
- bool ShouldUpdateCC = false;
+ MachineInstr *MI = nullptr;
+ MachineInstr *Sub = nullptr;
+ MachineInstr *Movr0Inst = nullptr;
bool NoSignFlag = false;
bool ClearsOverflowFlag = false;
+ bool ShouldUpdateCC = false;
X86::CondCode NewCC = X86::COND_INVALID;
- if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag, ClearsOverflowFlag)) {
- // Scan forward from the use until we hit the use we're looking for or the
- // compare instruction.
- for (MachineBasicBlock::iterator J = MI;; ++J) {
- // Do we have a convertible instruction?
- NewCC = isUseDefConvertible(*J);
- if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() &&
- J->getOperand(1).getReg() == SrcReg) {
- assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!");
- ShouldUpdateCC = true; // Update CC later on.
- // This is not a def of SrcReg, but still a def of EFLAGS. Keep going
- // with the new def.
- Def = J;
- MI = &*Def;
+
+ // Search backward from CmpInstr for the next instruction defining EFLAGS.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineBasicBlock &CmpMBB = *CmpInstr.getParent();
+ MachineBasicBlock::reverse_iterator From =
+ std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
+ MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
+ assert(SrcRegDef && "Must have a definition (SSA)");
+ for (MachineInstr &Inst : make_range(From, CmpMBB.rend())) {
+ // Try to use EFLAGS from the instruction defining %SrcReg. Example:
+ // %eax = addl ...
+ // ... // EFLAGS not changed
+ // testl %eax, %eax // <-- can be removed
+ if (&Inst == SrcRegDef) {
+ if (IsCmpZero && isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
+ MI = &Inst;
break;
}
-
- if (J == I)
- return false;
+ // Cannot find other candidates before definition of SrcReg.
+ return false;
}
- }
- // We are searching for an earlier instruction that can make CmpInstr
- // redundant and that instruction will be saved in Sub.
- MachineInstr *Sub = nullptr;
- const TargetRegisterInfo *TRI = &getRegisterInfo();
-
- // We iterate backward, starting from the instruction before CmpInstr and
- // stop when reaching the definition of a source register or done with the BB.
- // RI points to the instruction before CmpInstr.
- // If the definition is in this basic block, RE points to the definition;
- // otherwise, RE is the rend of the basic block.
- MachineBasicBlock::reverse_iterator
- RI = ++I.getReverse(),
- RE = CmpInstr.getParent() == MI->getParent()
- ? Def.getReverse() /* points to MI */
- : CmpInstr.getParent()->rend();
- MachineInstr *Movr0Inst = nullptr;
- for (; RI != RE; ++RI) {
- MachineInstr &Instr = *RI;
- // Check whether CmpInstr can be made redundant by the current instruction.
- if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
- CmpValue, Instr)) {
- Sub = &Instr;
- break;
- }
+ if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
+ // Try to use EFLAGS produced by an instruction reading %SrcReg. Example:
+ // %eax = ...
+ // ...
+ // popcntl %eax
+ // ... // EFLAGS not changed
+ // testl %eax, %eax // <-- can be removed
+ if (IsCmpZero) {
+ NewCC = isUseDefConvertible(Inst);
+ if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
+ Inst.getOperand(1).getReg() == SrcReg) {
+ ShouldUpdateCC = true;
+ MI = &Inst;
+ break;
+ }
+ }
- if (Instr.modifiesRegister(X86::EFLAGS, TRI) ||
- Instr.readsRegister(X86::EFLAGS, TRI)) {
- // This instruction modifies or uses EFLAGS.
+ // Try to use EFLAGS from an instruction with similar flag results.
+ // Example:
+ // sub x, y or cmp x, y
+ // ... // EFLAGS not changed
+ // cmp x, y // <-- can be removed
+ if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
+ CmpValue, Inst)) {
+ Sub = &Inst;
+ break;
+ }
- // MOV32r0 etc. are implemented with xor which clobbers condition code.
- // They are safe to move up, if the definition to EFLAGS is dead and
- // earlier instructions do not read or write EFLAGS.
- if (!Movr0Inst && Instr.getOpcode() == X86::MOV32r0 &&
- Instr.registerDefIsDead(X86::EFLAGS, TRI)) {
- Movr0Inst = &Instr;
+ // MOV32r0 is implemented with xor which clobbers condition code. It is
+ // safe to move up, if the definition to EFLAGS is dead and earlier
+ // instructions do not read or write EFLAGS.
+ if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
+ Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
+ Movr0Inst = &Inst;
continue;
}
- // We can't remove CmpInstr.
+ // Cannot do anything for any other EFLAG changes.
return false;
}
}
// Return false if no candidates exist.
- if (!IsCmpZero && !Sub)
+ if (!MI && !Sub)
return false;
bool IsSwapped =
@@ -4374,9 +4365,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// live-out.
bool IsSafe = false;
SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate;
- MachineBasicBlock::iterator E = CmpInstr.getParent()->end();
- for (++I; I != E; ++I) {
- const MachineInstr &Instr = *I;
+ MachineBasicBlock::iterator AfterCmpInstr =
+ std::next(MachineBasicBlock::iterator(CmpInstr));
+ for (MachineInstr &Instr : make_range(AfterCmpInstr, CmpMBB.end())) {
bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
// We should check the usage if this instruction uses and updates EFLAGS.
@@ -4449,7 +4440,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// Push the MachineInstr to OpsToUpdate.
// If it is safe to remove CmpInstr, the condition code of these
// instructions will be modified.
- OpsToUpdate.push_back(std::make_pair(&*I, ReplacementCC));
+ OpsToUpdate.push_back(std::make_pair(&Instr, ReplacementCC));
}
if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
// It is safe to remove CmpInstr if EFLAGS is updated again or killed.
@@ -4461,8 +4452,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// If EFLAGS is not killed nor re-defined, we should check whether it is
// live-out. If it is live-out, do not optimize.
if ((IsCmpZero || IsSwapped) && !IsSafe) {
- MachineBasicBlock *MBB = CmpInstr.getParent();
- for (MachineBasicBlock *Successor : MBB->successors())
+ for (MachineBasicBlock *Successor : CmpMBB.successors())
if (Successor->isLiveIn(X86::EFLAGS))
return false;
}
@@ -4472,8 +4462,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// Move Movr0Inst to the appropriate place before Sub.
if (Movr0Inst) {
// Look backwards until we find a def that doesn't use the current EFLAGS.
- Def = Sub;
- MachineBasicBlock::reverse_iterator InsertI = Def.getReverse(),
+ MachineBasicBlock::reverse_iterator InsertI = Sub,
InsertE = Sub->getParent()->rend();
for (; InsertI != InsertE; ++InsertI) {
MachineInstr *Instr = &*InsertI;
diff --git a/llvm/test/CodeGen/X86/optimize-compare.mir b/llvm/test/CodeGen/X86/optimize-compare.mir
new file mode 100644
index 000000000000..8a9c6504698b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/optimize-compare.mir
@@ -0,0 +1,103 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - %s -mtriple=x86_64-- -run-pass peephole-opt | FileCheck %s
+
+---
+name: opt_zerocmp_0
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_zerocmp_0
+ ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
+ ; CHECK-NEXT: [[XOR32ri:%[0-9]+]]:gr32 = XOR32ri [[COPY]], i32 1234, implicit-def $eflags
+ ; CHECK-NEXT: $al = SETCCr 5, implicit $eflags
+ %0:gr32 = COPY $esi
+ %1:gr32 = XOR32ri %0, i32 1234, implicit-def $eflags
+ ; TEST should be removed.
+ TEST32rr %1, %1, implicit-def $eflags
+ $al = SETCCr 5, implicit $eflags
+...
+---
+name: opt_zerocmp_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_zerocmp_1
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: [[DEC64r:%[0-9]+]]:gr64 = DEC64r [[COPY]], implicit-def $eflags
+ ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[DEC64r]], 5, $noreg, 12, $noreg
+ ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
+ %0:gr64 = COPY $rsi
+ %1:gr64 = DEC64r %0, implicit-def $eflags
+ ; CMP should be removed.
+ CMP64ri8 %1, 0, implicit-def $eflags
+ %2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
+ $al = SETCCr 4, implicit $eflags
+...
+---
+name: opt_zerocmp_user_0
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_zerocmp_user_0
+ ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
+ ; CHECK-NEXT: [[NEG32r:%[0-9]+]]:gr32 = NEG32r [[COPY]], implicit-def $eflags
+ ; CHECK-NEXT: $al = SETCCr 3, implicit $eflags
+ %0:gr32 = COPY $esi
+ %1:gr32 = NEG32r %0, implicit-def dead $eflags
+ ; TEST should be removed.
+ TEST32rr %0, %0, implicit-def $eflags
+ $al = SETCCr 4, implicit $eflags
+...
+---
+name: opt_redundant_flags_0
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_0
+ ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
+ ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[COPY]], [[COPY1]], implicit-def $eflags
+ ; CHECK-NEXT: $eax = COPY [[SUB32rr]]
+ ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+ %0:gr32 = COPY $esi
+ %1:gr32 = COPY $edi
+ %2:gr32 = SUB32rr %0, %1, implicit-def dead $eflags
+ $eax = COPY %2
+ ; CMP should be removed.
+ CMP32rr %0, %1, implicit-def $eflags
+ $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_redundant_flags_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_1
+ ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
+ ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[COPY]], [[COPY1]], implicit-def $eflags
+ ; CHECK-NEXT: $eax = COPY [[SUB32rr]]
+ ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+ %0:gr32 = COPY $esi
+ %1:gr32 = COPY $edi
+ %2:gr32 = SUB32rr %0, %1, implicit-def dead $eflags
+ $eax = COPY %2
+ ; CMP should be removed.
+ CMP32rr %1, %0, implicit-def $eflags
+ $bl = SETCCr 3, implicit $eflags
+...
+---
+name: opt_redundant_flags_2
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_2
+ ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
+ ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[COPY]], [[COPY1]], implicit-def $eflags
+ ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
+ ; CHECK-NEXT: $eax = COPY [[SUB32rr]]
+ ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+ %0:gr32 = COPY $esi
+ %1:gr32 = COPY $edi
+ %2:gr32 = SUB32rr %0, %1, implicit-def $eflags
+ ; an extra eflags reader shouldn't stop optimization.
+ $cl = SETCCr 2, implicit $eflags
+ $eax = COPY %2
+ CMP32rr %0, %1, implicit-def $eflags
+ $bl = SETCCr 2, implicit $eflags
+...
More information about the llvm-commits
mailing list