[llvm] 5609c8b - [X86FixupLEAs] Try again to transform the sequence LEA/SUB to SUB/SUB
Guozhi Wei via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 16 10:19:04 PDT 2021
Author: Guozhi Wei
Date: 2021-07-16T10:16:03-07:00
New Revision: 5609c8b60730519eeb7bc95b61d2c09879dff44b
URL: https://github.com/llvm/llvm-project/commit/5609c8b60730519eeb7bc95b61d2c09879dff44b
DIFF: https://github.com/llvm/llvm-project/commit/5609c8b60730519eeb7bc95b61d2c09879dff44b.diff
LOG: [X86FixupLEAs] Try again to transform the sequence LEA/SUB to SUB/SUB
This patch transforms the sequence
lea (reg1, reg2), reg3
sub reg3, reg4
to two sub instructions
sub reg1, reg4
sub reg2, reg4
Similar optimization can also be applied to LEA/ADD sequence.
The modifications to TwoAddressInstructionPass is to ensure the operands of ADD
instruction has expected order (the dest register of LEA should be src register
of ADD).
Differential Revision: https://reviews.llvm.org/D104684
Added:
Modified:
llvm/include/llvm/CodeGen/TargetInstrInfo.h
llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
llvm/lib/Target/X86/X86FixupLEAs.cpp
llvm/lib/Target/X86/X86InstrInfo.cpp
llvm/lib/Target/X86/X86InstrInfo.h
llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
llvm/test/CodeGen/X86/lea-opt2.ll
llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 5c45cea5ccfe7..05d0591f1e5da 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -465,6 +465,13 @@ class TargetInstrInfo : public MCInstrInfo {
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const;
+ /// Returns true if the target has a preference on the operands order of
+ /// the given machine instruction. And specify if \p Commute is required to
+ /// get the desired operands order.
+ virtual bool hasCommutePreference(MachineInstr &MI, bool &Commute) const {
+ return false;
+ }
+
/// A pair composed of a register and a sub-register index.
/// Used to give some type checking when modeling Reg:SubReg.
struct RegSubRegPair {
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index bd20f32ee2534..1664b4dadfec9 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -527,6 +527,11 @@ bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA,
if (isRevCopyChain(RegB, RegA, MaxDataFlowEdge))
return false;
+ // Look for other target specific commute preference.
+ bool Commute;
+ if (TII->hasCommutePreference(*MI, Commute))
+ return Commute;
+
// Since there are no intervening uses for both registers, then commute
// if the def of RegC is closer. Its live interval is shorter.
return LastDefB && LastDefC && LastDefC > LastDefB;
diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 0054d5818a964..05cab776e0b76 100644
--- a/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -79,6 +79,30 @@ class FixupLEAPass : public MachineFunctionPass {
MachineBasicBlock &MBB, bool OptIncDec,
bool UseLEAForSP) const;
+ /// Look for and transform the sequence
+ /// lea (reg1, reg2), reg3
+ /// sub reg3, reg4
+ /// to
+ /// sub reg1, reg4
+ /// sub reg2, reg4
+ /// It can also optimize the sequence lea/add similarly.
+ bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
+
+ /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
+ /// the dest register of LEA instruction I.
+ MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
+ MachineBasicBlock &MBB) const;
+
+ /// Check instructions between LeaI and AluI (exclusively).
+ /// Set BaseIndexDef to true if base or index register from LeaI is defined.
+ /// Set AluDestRef to true if the dest register of AluI is used or defined.
+ /// *KilledBase is set to the killed base register usage.
+ /// *KilledIndex is set to the killed index register usage.
+ void checkRegUsage(MachineBasicBlock::iterator &LeaI,
+ MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
+ bool &AluDestRef, MachineOperand **KilledBase,
+ MachineOperand **KilledIndex) const;
+
/// Determine if an instruction references a machine register
/// and, if so, whether it reads or writes the register.
RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
@@ -338,6 +362,18 @@ static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
}
}
+static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
+ switch (LEAOpcode) {
+ default:
+ llvm_unreachable("Unexpected LEA instruction");
+ case X86::LEA32r:
+ case X86::LEA64_32r:
+ return X86::SUB32rr;
+ case X86::LEA64r:
+ return X86::SUB64rr;
+ }
+}
+
static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
const MachineOperand &Offset) {
bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
@@ -364,6 +400,162 @@ static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
}
}
+MachineBasicBlock::iterator
+FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
+ MachineBasicBlock &MBB) const {
+ const int InstrDistanceThreshold = 5;
+ int InstrDistance = 1;
+ MachineBasicBlock::iterator CurInst = std::next(I);
+
+ unsigned LEAOpcode = I->getOpcode();
+ unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
+ unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
+ Register DestReg = I->getOperand(0).getReg();
+
+ while (CurInst != MBB.end()) {
+ if (CurInst->isCall() || CurInst->isInlineAsm())
+ break;
+ if (InstrDistance > InstrDistanceThreshold)
+ break;
+
+ // Check if the lea dest register is used in an add/sub instruction only.
+ for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
+ MachineOperand &Opnd = CurInst->getOperand(I);
+ if (Opnd.isReg()) {
+ if (Opnd.getReg() == DestReg) {
+ if (Opnd.isDef() || !Opnd.isKill())
+ return MachineBasicBlock::iterator();
+
+ unsigned AluOpcode = CurInst->getOpcode();
+ if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
+ return MachineBasicBlock::iterator();
+
+ MachineOperand &Opnd2 = CurInst->getOperand(3 - I);
+ MachineOperand AluDest = CurInst->getOperand(0);
+ if (Opnd2.getReg() != AluDest.getReg())
+ return MachineBasicBlock::iterator();
+
+ // X - (Y + Z) may generate
diff erent flags than (X - Y) - Z when
+ // there is overflow. So we can't change the alu instruction if the
+ // flags register is live.
+ if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))
+ return MachineBasicBlock::iterator();
+
+ return CurInst;
+ }
+ if (TRI->regsOverlap(DestReg, Opnd.getReg()))
+ return MachineBasicBlock::iterator();
+ }
+ }
+
+ InstrDistance++;
+ ++CurInst;
+ }
+ return MachineBasicBlock::iterator();
+}
+
+void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
+ MachineBasicBlock::iterator &AluI,
+ bool &BaseIndexDef, bool &AluDestRef,
+ MachineOperand **KilledBase,
+ MachineOperand **KilledIndex) const {
+ BaseIndexDef = AluDestRef = false;
+ *KilledBase = *KilledIndex = nullptr;
+ Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
+ Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
+ Register AluDestReg = AluI->getOperand(0).getReg();
+
+ MachineBasicBlock::iterator CurInst = std::next(LeaI);
+ while (CurInst != AluI) {
+ for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
+ MachineOperand &Opnd = CurInst->getOperand(I);
+ if (!Opnd.isReg())
+ continue;
+ Register Reg = Opnd.getReg();
+ if (TRI->regsOverlap(Reg, AluDestReg))
+ AluDestRef = true;
+ if (TRI->regsOverlap(Reg, BaseReg)) {
+ if (Opnd.isDef())
+ BaseIndexDef = true;
+ else if (Opnd.isKill())
+ *KilledBase = &Opnd;
+ }
+ if (TRI->regsOverlap(Reg, IndexReg)) {
+ if (Opnd.isDef())
+ BaseIndexDef = true;
+ else if (Opnd.isKill())
+ *KilledIndex = &Opnd;
+ }
+ }
+ ++CurInst;
+ }
+}
+
+bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
+ MachineBasicBlock &MBB) const {
+ // Look for an add/sub instruction which uses the result of lea.
+ MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
+ if (AluI == MachineBasicBlock::iterator())
+ return false;
+
+ // Check if there are any related register usage between lea and alu.
+ bool BaseIndexDef, AluDestRef;
+ MachineOperand *KilledBase, *KilledIndex;
+ checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex);
+
+ MachineBasicBlock::iterator InsertPos = AluI;
+ if (BaseIndexDef) {
+ if (AluDestRef)
+ return false;
+ InsertPos = I;
+ KilledBase = KilledIndex = nullptr;
+ }
+
+ // Check if there are same registers.
+ Register AluDestReg = AluI->getOperand(0).getReg();
+ Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();
+ Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();
+ if (I->getOpcode() == X86::LEA64_32r) {
+ BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
+ IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
+ }
+ if (AluDestReg == IndexReg) {
+ if (BaseReg == IndexReg)
+ return false;
+ std::swap(BaseReg, IndexReg);
+ std::swap(KilledBase, KilledIndex);
+ }
+ if (BaseReg == IndexReg)
+ KilledBase = nullptr;
+
+ // Now it's safe to change instructions.
+ MachineInstr *NewMI1, *NewMI2;
+ unsigned NewOpcode = AluI->getOpcode();
+ NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
+ AluDestReg)
+ .addReg(AluDestReg, RegState::Kill)
+ .addReg(BaseReg, KilledBase ? RegState::Kill : 0);
+ NewMI1->addRegisterDead(X86::EFLAGS, TRI);
+ NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
+ AluDestReg)
+ .addReg(AluDestReg, RegState::Kill)
+ .addReg(IndexReg, KilledIndex ? RegState::Kill : 0);
+ NewMI2->addRegisterDead(X86::EFLAGS, TRI);
+
+ // Clear the old Kill flags.
+ if (KilledBase)
+ KilledBase->setIsKill(false);
+ if (KilledIndex)
+ KilledIndex->setIsKill(false);
+
+ MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1);
+ MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
+ MBB.erase(I);
+ MBB.erase(AluI);
+ I = NewMI1;
+ return true;
+}
+
bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB, bool OptIncDec,
bool UseLEAForSP) const {
@@ -398,6 +590,7 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
MachineInstr *NewMI = nullptr;
+ // Case 1.
// Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
// which can be turned into add %reg2, %reg1
if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
@@ -417,6 +610,7 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
.addReg(BaseReg).addReg(IndexReg);
}
} else if (DestReg == BaseReg && IndexReg == 0) {
+ // Case 2.
// This is an LEA with only a base register and a displacement,
// We can use ADDri or INC/DEC.
@@ -447,6 +641,12 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
.addReg(BaseReg).addImm(Disp.getImm());
}
}
+ } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
+ // Case 3.
+ // Look for and transform the sequence
+ // lea (reg1, reg2), reg3
+ // sub reg3, reg4
+ return optLEAALU(I, MBB);
} else
return false;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index a5a687f1e7b9a..370532129f073 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -2670,6 +2670,58 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
return false;
}
+static bool isConvertibleLEA(MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ if (Opcode != X86::LEA32r && Opcode != X86::LEA64r &&
+ Opcode != X86::LEA64_32r)
+ return false;
+
+ const MachineOperand &Scale = MI->getOperand(1 + X86::AddrScaleAmt);
+ const MachineOperand &Disp = MI->getOperand(1 + X86::AddrDisp);
+ const MachineOperand &Segment = MI->getOperand(1 + X86::AddrSegmentReg);
+
+ if (Segment.getReg() != 0 || !Disp.isImm() || Disp.getImm() != 0 ||
+ Scale.getImm() > 1)
+ return false;
+
+ return true;
+}
+
+bool X86InstrInfo::hasCommutePreference(MachineInstr &MI, bool &Commute) const {
+ // Currently we're interested in following sequence only.
+ // r3 = lea r1, r2
+ // r5 = add r3, r4
+ // Both r3 and r4 are killed in add, we hope the add instruction has the
+ // operand order
+ // r5 = add r4, r3
+ // So later in X86FixupLEAs the lea instruction can be rewritten as add.
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode != X86::ADD32rr && Opcode != X86::ADD64rr)
+ return false;
+
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ Register Reg1 = MI.getOperand(1).getReg();
+ Register Reg2 = MI.getOperand(2).getReg();
+
+ // Check if Reg1 comes from LEA in the same MBB.
+ if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg1)) {
+ if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
+ Commute = true;
+ return true;
+ }
+ }
+
+ // Check if Reg2 comes from LEA in the same MBB.
+ if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg2)) {
+ if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
+ Commute = false;
+ return true;
+ }
+ }
+
+ return false;
+}
+
X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default: return X86::COND_INVALID;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 3cf6a7c15ec8f..c663bb32af375 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -284,6 +284,10 @@ class X86InstrInfo final : public X86GenInstrInfo {
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
+ /// Returns true if we have preference on the operands order in MI, the
+ /// commute decision is returned in Commute.
+ bool hasCommutePreference(MachineInstr &MI, bool &Commute) const override;
+
/// Returns an adjusted FMA opcode that must be used in FMA instruction that
/// performs the same computations as the given \p MI but which has the
/// operands \p SrcOpIdx1 and \p SrcOpIdx2 commuted.
diff --git a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index e4b7a8b7a548c..beac717343210 100644
--- a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -29,9 +29,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r15, %rbx
; CHECK-NEXT: addq %rdx, %rbx
; CHECK-NEXT: addq %rsi, %rbx
-; CHECK-NEXT: leaq (%r9,%r10), %rsi
-; CHECK-NEXT: leaq (%rsi,%r8), %rdx
-; CHECK-NEXT: addq %rsi, %rdx
+; CHECK-NEXT: leaq (%r9,%r10), %rdx
+; CHECK-NEXT: addq %rdx, %rdx
+; CHECK-NEXT: addq %r8, %rdx
; CHECK-NEXT: movq X(%rip), %rdi
; CHECK-NEXT: addq %rbx, %r12
; CHECK-NEXT: addq %r8, %rdx
@@ -41,9 +41,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r12, %rsi
; CHECK-NEXT: addq %r11, %rdi
; CHECK-NEXT: addq %rsi, %rdi
-; CHECK-NEXT: leaq (%r10,%r8), %rbx
-; CHECK-NEXT: leaq (%rdx,%rbx), %rsi
-; CHECK-NEXT: addq %rbx, %rsi
+; CHECK-NEXT: leaq (%r10,%r8), %rsi
+; CHECK-NEXT: addq %rsi, %rsi
+; CHECK-NEXT: addq %rdx, %rsi
; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: addq %r12, %rdi
; CHECK-NEXT: addq %rdi, %r9
@@ -54,9 +54,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r9, %rdi
; CHECK-NEXT: addq %r14, %rbx
; CHECK-NEXT: addq %rdi, %rbx
-; CHECK-NEXT: leaq (%rdx,%r8), %rax
-; CHECK-NEXT: leaq (%rsi,%rax), %rdi
-; CHECK-NEXT: addq %rax, %rdi
+; CHECK-NEXT: leaq (%rdx,%r8), %rdi
+; CHECK-NEXT: addq %rdi, %rdi
+; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: movq X(%rip), %rcx
; CHECK-NEXT: addq %r9, %rbx
; CHECK-NEXT: addq %rbx, %r10
@@ -67,9 +67,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r10, %rax
; CHECK-NEXT: addq %r15, %rcx
; CHECK-NEXT: addq %rax, %rcx
-; CHECK-NEXT: leaq (%rsi,%rdx), %rbx
-; CHECK-NEXT: leaq (%rdi,%rbx), %r11
-; CHECK-NEXT: addq %rbx, %r11
+; CHECK-NEXT: leaq (%rsi,%rdx), %r11
+; CHECK-NEXT: addq %r11, %r11
+; CHECK-NEXT: addq %rdi, %r11
; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: addq %r10, %rcx
; CHECK-NEXT: addq %rcx, %r8
@@ -80,9 +80,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r8, %rcx
; CHECK-NEXT: addq %r12, %rbx
; CHECK-NEXT: addq %rcx, %rbx
-; CHECK-NEXT: leaq (%rdi,%rsi), %rax
-; CHECK-NEXT: leaq (%r11,%rax), %r14
-; CHECK-NEXT: addq %rax, %r14
+; CHECK-NEXT: leaq (%rdi,%rsi), %r14
+; CHECK-NEXT: addq %r14, %r14
+; CHECK-NEXT: addq %r11, %r14
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: addq %r8, %rbx
; CHECK-NEXT: addq %rbx, %rdx
@@ -93,9 +93,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %rdx, %rbx
; CHECK-NEXT: addq %r9, %rax
; CHECK-NEXT: addq %rbx, %rax
-; CHECK-NEXT: leaq (%r11,%rdi), %rbx
-; CHECK-NEXT: leaq (%r14,%rbx), %r9
-; CHECK-NEXT: addq %rbx, %r9
+; CHECK-NEXT: leaq (%r11,%rdi), %r9
+; CHECK-NEXT: addq %r9, %r9
+; CHECK-NEXT: addq %r14, %r9
; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: addq %rax, %rsi
@@ -106,9 +106,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: addq %r10, %rbx
; CHECK-NEXT: addq %rax, %rbx
-; CHECK-NEXT: leaq (%r14,%r11), %rax
-; CHECK-NEXT: leaq (%r9,%rax), %r10
-; CHECK-NEXT: addq %rax, %r10
+; CHECK-NEXT: leaq (%r14,%r11), %r10
+; CHECK-NEXT: addq %r10, %r10
+; CHECK-NEXT: addq %r9, %r10
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: addq %rsi, %rbx
; CHECK-NEXT: addq %rbx, %rdi
@@ -119,9 +119,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %rdi, %rbx
; CHECK-NEXT: addq %r8, %rax
; CHECK-NEXT: addq %rbx, %rax
-; CHECK-NEXT: leaq (%r9,%r14), %rbx
-; CHECK-NEXT: leaq (%r10,%rbx), %r8
-; CHECK-NEXT: addq %rbx, %r8
+; CHECK-NEXT: leaq (%r9,%r14), %r8
+; CHECK-NEXT: addq %r8, %r8
+; CHECK-NEXT: addq %r10, %r8
; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: addq %rdi, %rax
; CHECK-NEXT: addq %rax, %r11
@@ -132,9 +132,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r11, %rax
; CHECK-NEXT: addq %rdx, %rbx
; CHECK-NEXT: addq %rax, %rbx
-; CHECK-NEXT: leaq (%r10,%r9), %rax
-; CHECK-NEXT: leaq (%r8,%rax), %r15
-; CHECK-NEXT: addq %rax, %r15
+; CHECK-NEXT: leaq (%r10,%r9), %r15
+; CHECK-NEXT: addq %r15, %r15
+; CHECK-NEXT: addq %r8, %r15
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: addq %r11, %rbx
; CHECK-NEXT: addq %rbx, %r14
@@ -145,9 +145,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r14, %rbx
; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: addq %rbx, %rax
-; CHECK-NEXT: leaq (%r8,%r10), %rbx
-; CHECK-NEXT: leaq (%r15,%rbx), %rsi
-; CHECK-NEXT: addq %rbx, %rsi
+; CHECK-NEXT: leaq (%r8,%r10), %rsi
+; CHECK-NEXT: addq %rsi, %rsi
+; CHECK-NEXT: addq %r15, %rsi
; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: addq %r14, %rax
; CHECK-NEXT: addq %rax, %r9
@@ -158,9 +158,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r9, %rax
; CHECK-NEXT: addq %rdi, %rbx
; CHECK-NEXT: addq %rax, %rbx
-; CHECK-NEXT: leaq (%r15,%r8), %rax
-; CHECK-NEXT: leaq (%rsi,%rax), %r12
-; CHECK-NEXT: addq %rax, %r12
+; CHECK-NEXT: leaq (%r15,%r8), %r12
+; CHECK-NEXT: addq %r12, %r12
+; CHECK-NEXT: addq %rsi, %r12
; CHECK-NEXT: movq X(%rip), %rcx
; CHECK-NEXT: addq %r9, %rbx
; CHECK-NEXT: addq %rbx, %r10
@@ -171,9 +171,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r10, %rax
; CHECK-NEXT: addq %r11, %rcx
; CHECK-NEXT: addq %rax, %rcx
-; CHECK-NEXT: leaq (%rsi,%r15), %rbx
-; CHECK-NEXT: leaq (%r12,%rbx), %rax
-; CHECK-NEXT: addq %rbx, %rax
+; CHECK-NEXT: leaq (%rsi,%r15), %rax
+; CHECK-NEXT: addq %rax, %rax
+; CHECK-NEXT: addq %r12, %rax
; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: addq %r10, %rcx
; CHECK-NEXT: addq %rcx, %r8
@@ -184,9 +184,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r8, %rcx
; CHECK-NEXT: addq %r14, %rbx
; CHECK-NEXT: addq %rcx, %rbx
-; CHECK-NEXT: leaq (%r12,%rsi), %rdx
-; CHECK-NEXT: leaq (%rax,%rdx), %rcx
-; CHECK-NEXT: addq %rdx, %rcx
+; CHECK-NEXT: leaq (%r12,%rsi), %rcx
+; CHECK-NEXT: addq %rcx, %rcx
+; CHECK-NEXT: addq %rax, %rcx
; CHECK-NEXT: movq X(%rip), %rdx
; CHECK-NEXT: addq %r8, %rbx
; CHECK-NEXT: addq %rbx, %r15
@@ -197,9 +197,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r15, %rbx
; CHECK-NEXT: addq %r9, %rdx
; CHECK-NEXT: addq %rbx, %rdx
-; CHECK-NEXT: leaq (%rax,%r12), %r9
-; CHECK-NEXT: leaq (%rcx,%r9), %rbx
-; CHECK-NEXT: addq %r9, %rbx
+; CHECK-NEXT: leaq (%rax,%r12), %rbx
+; CHECK-NEXT: addq %rbx, %rbx
+; CHECK-NEXT: addq %rcx, %rbx
; CHECK-NEXT: addq %r15, %rdx
; CHECK-NEXT: addq %rdx, %rsi
; CHECK-NEXT: addq %rcx, %rbx
@@ -211,12 +211,12 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: addq %rdi, %rdx
; CHECK-NEXT: addq %rax, %rcx
-; CHECK-NEXT: leaq (%rbx,%rcx), %rdi
-; CHECK-NEXT: addq %rcx, %rdi
-; CHECK-NEXT: addq %rbx, %rdi
+; CHECK-NEXT: addq %rcx, %rcx
+; CHECK-NEXT: addq %rbx, %rcx
+; CHECK-NEXT: addq %rbx, %rcx
; CHECK-NEXT: addq %rsi, %rdx
; CHECK-NEXT: addq %rdx, %r12
-; CHECK-NEXT: addq %rdx, %rdi
+; CHECK-NEXT: addq %rdx, %rcx
; CHECK-NEXT: addq %r15, %rsi
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: bswapq %rax
@@ -225,7 +225,7 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r12, %rsi
; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: addq %r12, %rax
-; CHECK-NEXT: addq %rdi, %rax
+; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
; CHECK-NEXT: popq %r14
diff --git a/llvm/test/CodeGen/X86/lea-opt2.ll b/llvm/test/CodeGen/X86/lea-opt2.ll
index 3ec68fbcd7c5f..4d184f7d2eeae 100644
--- a/llvm/test/CodeGen/X86/lea-opt2.ll
+++ b/llvm/test/CodeGen/X86/lea-opt2.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s
; This file tests following optimization
;
@@ -11,15 +11,14 @@
; subl %edx, %ecx
; subl %eax, %ecx
-; TODO: replace lea with sub.
; C - (A + B) --> C - A - B
define i32 @test1(i32* %p, i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: leal (%rdx,%rax), %esi
-; CHECK-NEXT: subl %esi, %ecx
+; CHECK-NEXT: subl %edx, %ecx
+; CHECK-NEXT: subl %eax, %ecx
; CHECK-NEXT: movl %ecx, (%rdi)
; CHECK-NEXT: subl %edx, %eax
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
@@ -32,16 +31,15 @@ entry:
ret i32 %sub1
}
-; TODO: replace lea with add.
; (A + B) + C --> C + A + B
define i32 @test2(i32* %p, i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: leal (%rax,%rdx), %esi
-; CHECK-NEXT: addl %ecx, %esi
-; CHECK-NEXT: movl %esi, (%rdi)
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: addl %edx, %ecx
+; CHECK-NEXT: movl %ecx, (%rdi)
; CHECK-NEXT: subl %edx, %eax
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
@@ -53,16 +51,15 @@ entry:
ret i32 %sub1
}
-; TODO: replace lea with add.
; C + (A + B) --> C + A + B
define i32 @test3(i32* %p, i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: leal (%rax,%rdx), %esi
-; CHECK-NEXT: addl %ecx, %esi
-; CHECK-NEXT: movl %esi, (%rdi)
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: addl %edx, %ecx
+; CHECK-NEXT: movl %ecx, (%rdi)
; CHECK-NEXT: subl %edx, %eax
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
@@ -95,13 +92,12 @@ entry:
ret i32 %sub1
}
-; TODO: replace lea with sub.
define i64 @test5(i64* %p, i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: leaq (%rdx,%rax), %rsi
-; CHECK-NEXT: subq %rsi, %rcx
+; CHECK-NEXT: subq %rdx, %rcx
+; CHECK-NEXT: subq %rax, %rcx
; CHECK-NEXT: movq %rcx, (%rdi)
; CHECK-NEXT: subq %rdx, %rax
; CHECK-NEXT: retq
@@ -114,14 +110,13 @@ entry:
ret i64 %sub1
}
-; TODO: replace lea with add.
define i64 @test6(i64* %p, i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test6:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: leaq (%rdx,%rax), %rsi
-; CHECK-NEXT: addq %rcx, %rsi
-; CHECK-NEXT: movq %rsi, (%rdi)
+; CHECK-NEXT: addq %rdx, %rcx
+; CHECK-NEXT: addq %rax, %rcx
+; CHECK-NEXT: movq %rcx, (%rdi)
; CHECK-NEXT: subq %rdx, %rax
; CHECK-NEXT: retq
entry:
@@ -133,14 +128,13 @@ entry:
ret i64 %sub1
}
-; TODO: replace lea with add.
define i64 @test7(i64* %p, i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test7:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: leaq (%rdx,%rax), %rsi
-; CHECK-NEXT: addq %rcx, %rsi
-; CHECK-NEXT: movq %rsi, (%rdi)
+; CHECK-NEXT: addq %rdx, %rcx
+; CHECK-NEXT: addq %rax, %rcx
+; CHECK-NEXT: movq %rcx, (%rdi)
; CHECK-NEXT: subq %rdx, %rax
; CHECK-NEXT: retq
entry:
@@ -152,3 +146,118 @@ entry:
ret i64 %sub1
}
+; The sub instruction generated flags is used by following branch,
+; so it should not be transformed.
+define i64 @test8(i64* %p, i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: leaq (%rdx,%rax), %rsi
+; CHECK-NEXT: subq %rsi, %rcx
+; CHECK-NEXT: ja .LBB7_2
+; CHECK-NEXT: # %bb.1: # %then
+; CHECK-NEXT: movq %rcx, (%rdi)
+; CHECK-NEXT: subq %rdx, %rax
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB7_2: # %else
+; CHECK-NEXT: movq $0, (%rdi)
+; CHECK-NEXT: subq %rdx, %rax
+; CHECK-NEXT: retq
+entry:
+ %ld = load i64, i64* %p, align 8
+ %0 = add i64 %b, %ld
+ %sub = sub i64 %c, %0
+ %cond = icmp ule i64 %c, %0
+ br i1 %cond, label %then, label %else
+
+then:
+ store i64 %sub, i64* %p, align 8
+ br label %endif
+
+else:
+ store i64 0, i64* %p, align 8
+ br label %endif
+
+endif:
+ %sub1 = sub i64 %ld, %b
+ ret i64 %sub1
+}
+
+; PR50615
+; The sub register usage of lea dest should block the transformation.
+define void @test9(i64 %p, i64 %s) {
+; CHECK-LABEL: test9:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: leaq (%rsi,%rdi), %rax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testl $4095, %eax # imm = 0xFFF
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: shlq $12, %rcx
+; CHECK-NEXT: addq %rax, %rcx
+; CHECK-NEXT: andq $-4096, %rcx # imm = 0xF000
+; CHECK-NEXT: addq %rcx, %rdi
+; CHECK-NEXT: jmp bar at PLT # TAILCALL
+entry:
+ %add = add i64 %s, %p
+ %rem = and i64 %add, 4095
+ %cmp.not = icmp eq i64 %rem, 0
+ %add18 = select i1 %cmp.not, i64 0, i64 4096
+ %div9 = add i64 %add18, %add
+ %mul = and i64 %div9, -4096
+ %add2 = add i64 %mul, %p
+ tail call void @bar(i64 %add2, i64 %s)
+ ret void
+}
+
+define void @test10() {
+; CHECK-LABEL: test10:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl (%rax), %eax
+; CHECK-NEXT: movzwl (%rax), %ecx
+; CHECK-NEXT: leal (%rcx,%rcx,2), %esi
+; CHECK-NEXT: movl %ecx, %edi
+; CHECK-NEXT: subl %ecx, %edi
+; CHECK-NEXT: subl %ecx, %edi
+; CHECK-NEXT: negl %esi
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: cmpl $4, %eax
+; CHECK-NEXT: movl %edi, (%rax)
+; CHECK-NEXT: movl %esi, (%rax)
+; CHECK-NEXT: cmovnel %eax, %ecx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: sarl %cl, %esi
+; CHECK-NEXT: movl %esi, (%rax)
+; CHECK-NEXT: retq
+entry:
+ %tmp = load i32, i32* undef, align 4
+ %tmp3 = sdiv i32 undef, 6
+ %tmp4 = load i32, i32* undef, align 4
+ %tmp5 = icmp eq i32 %tmp4, 4
+ %tmp6 = select i1 %tmp5, i32 %tmp3, i32 %tmp
+ %tmp10 = load i16, i16* undef, align 2
+ %tmp11 = zext i16 %tmp10 to i32
+ %tmp13 = zext i16 undef to i32
+ %tmp15 = load i16, i16* undef, align 2
+ %tmp16 = zext i16 %tmp15 to i32
+ %tmp19 = shl nsw i32 undef, 1
+ %tmp25 = shl nsw i32 undef, 1
+ %tmp26 = add nsw i32 %tmp25, %tmp13
+ %tmp28 = shl nsw i32 undef, 1
+ %tmp29 = add nsw i32 %tmp28, %tmp16
+ %tmp30 = sub nsw i32 %tmp19, %tmp29
+ %tmp31 = sub nsw i32 %tmp11, %tmp26
+ %tmp32 = shl nsw i32 %tmp30, 1
+ %tmp33 = add nsw i32 %tmp32, %tmp31
+ store i32 %tmp33, i32* undef, align 4
+ %tmp34 = mul nsw i32 %tmp31, -2
+ %tmp35 = add nsw i32 %tmp34, %tmp30
+ store i32 %tmp35, i32* undef, align 4
+ %tmp36 = select i1 %tmp5, i32 undef, i32 undef
+ %tmp38 = load i32, i32* undef, align 4
+ %tmp39 = ashr i32 %tmp38, %tmp6
+ store i32 %tmp39, i32* undef, align 4
+ ret void
+}
+
+declare void @bar(i64, i64)
+
diff --git a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
index 05f26a5ad56b1..8ad85406b9d6a 100644
--- a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
+++ b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
@@ -53,9 +53,9 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: kmovw %k1, %ecx
; X86-NEXT: addl %edi, %ecx
-; X86-NEXT: addl %eax, %ecx
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: movw %cx, (%esi)
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: addl %edx, %eax
+; X86-NEXT: movw %ax, (%esi)
; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -107,10 +107,10 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1
; X64-NEXT: kmovw %k1, %ebx
; X64-NEXT: addl %edi, %eax
; X64-NEXT: addl %ecx, %edx
-; X64-NEXT: leal (%rbx,%rsi), %ecx
-; X64-NEXT: addl %eax, %ecx
-; X64-NEXT: addl %edx, %ecx
-; X64-NEXT: movw %cx, (%r14)
+; X64-NEXT: addl %ebx, %eax
+; X64-NEXT: addl %esi, %eax
+; X64-NEXT: addl %edx, %eax
+; X64-NEXT: movw %ax, (%r14)
; X64-NEXT: leaq -16(%rbp), %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r14
More information about the llvm-commits
mailing list