[llvm] c69af70 - [AArch64] Adds SUBS and ADDS instructions to the MIPeepholeOpt.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 19 07:35:58 PST 2022
Author: Micah Weston
Date: 2022-02-19T15:35:53Z
New Revision: c69af70f02f200c1c443cbd8f43b1bc9fb59cced
URL: https://github.com/llvm/llvm-project/commit/c69af70f02f200c1c443cbd8f43b1bc9fb59cced
DIFF: https://github.com/llvm/llvm-project/commit/c69af70f02f200c1c443cbd8f43b1bc9fb59cced.diff
LOG: [AArch64] Adds SUBS and ADDS instructions to the MIPeepholeOpt.
Implements ADDS/SUBS 24-bit immediate optimization using the
MIPeepholeOpt pass. This follows the pattern:
Optimize ([adds|subs] r, imm) -> ([ADDS|SUBS] ([ADD|SUB] r, #imm0, lsl #12), #imm1),
if imm == (imm0<<12)+imm1. and both imm0 and imm1 are non-zero 12-bit unsigned
integers.
Optimize ([adds|subs] r, imm) -> ([SUBS|ADDS] ([SUB|ADD] r, #imm0, lsl #12), #imm1),
if imm == -(imm0<<12)-imm1, and both imm0 and imm1 are non-zero 12-bit unsigned
integers.
The SplitAndOpcFunc type had to change the return type to an Opcode pair so that
the first add/sub is the regular instruction and the second is the flag setting
instruction. This required updating the code in the AND case.
Testing:
I ran a two stage bootstrap with this code.
Using the second stage compiler, I verified that the negation of an ADDS to SUBS
or vice versa is a valid optimization. Example V == -0x111111.
Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D118663
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.h
llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
llvm/test/CodeGen/AArch64/addsub.ll
llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index e80a9ae7c0ee..84469dd257ca 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1547,27 +1547,6 @@ findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) {
}
}
-namespace {
-
-struct UsedNZCV {
- bool N = false;
- bool Z = false;
- bool C = false;
- bool V = false;
-
- UsedNZCV() = default;
-
- UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
- this->N |= UsedFlags.N;
- this->Z |= UsedFlags.Z;
- this->C |= UsedFlags.C;
- this->V |= UsedFlags.V;
- return *this;
- }
-};
-
-} // end anonymous namespace
-
/// Find a condition code used by the instruction.
/// Returns AArch64CC::Invalid if either the instruction does not use condition
/// codes or we don't optimize CmpInstr in the presence of such instructions.
@@ -1622,15 +1601,15 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
return UsedFlags;
}
-/// \returns Conditions flags used after \p CmpInstr in its MachineBB if they
-/// are not containing C or V flags and NZCV flags are not alive in successors
-/// of the same \p CmpInstr and \p MI parent. \returns None otherwise.
+/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
+/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
+/// \returns None otherwise.
///
/// Collect instructions using that flags in \p CCUseInstrs if provided.
-static Optional<UsedNZCV>
-examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
- const TargetRegisterInfo &TRI,
- SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr) {
+Optional<UsedNZCV>
+llvm::examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
+ const TargetRegisterInfo &TRI,
+ SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
MachineBasicBlock *CmpParent = CmpInstr.getParent();
if (MI.getParent() != CmpParent)
return None;
@@ -1652,8 +1631,6 @@ examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
break;
}
- if (NZCVUsedAfterCmp.C || NZCVUsedAfterCmp.V)
- return None;
return NZCVUsedAfterCmp;
}
@@ -1684,7 +1661,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
return false;
- if (!examineCFlagsUse(MI, CmpInstr, TRI))
+ Optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
+ if (!NZVCUsed || NZVCUsed->C || NZVCUsed->V)
return false;
AccessKind AccessToCheck = AK_Write;
@@ -1773,7 +1751,7 @@ static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
// Condition flags are not used in CmpInstr basic block successors and only
// Z or N flags allowed to be used after CmpInstr within its basic block
- if (!NZCVUsedAfterCmp)
+ if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
return false;
// Z or N flag used after CmpInstr must correspond to the flag used in MI
if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index b522230496d2..55b1813f0b30 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -362,6 +362,33 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
const MachineRegisterInfo *MRI) const;
};
+struct UsedNZCV {
+ bool N = false;
+ bool Z = false;
+ bool C = false;
+ bool V = false;
+
+ UsedNZCV() = default;
+
+ UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
+ this->N |= UsedFlags.N;
+ this->Z |= UsedFlags.Z;
+ this->C |= UsedFlags.C;
+ this->V |= UsedFlags.V;
+ return *this;
+ }
+};
+
+/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
+/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
+/// \returns None otherwise.
+///
+/// Collect instructions using that flags in \p CCUseInstrs if provided.
+Optional<UsedNZCV>
+examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
+ const TargetRegisterInfo &TRI,
+ SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr);
+
/// Return true if there is an instruction /after/ \p DefMI and before \p UseMI
/// which either reads or clobbers NZCV.
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 1fc5617b49f6..bfee78d75151 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -60,12 +60,13 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
MachineLoopInfo *MLI;
MachineRegisterInfo *MRI;
+ using OpcodePair = std::pair<unsigned, unsigned>;
template <typename T>
using SplitAndOpcFunc =
- std::function<Optional<unsigned>(T, unsigned, T &, T &)>;
+ std::function<Optional<OpcodePair>(T, unsigned, T &, T &)>;
using BuildMIFunc =
- std::function<void(MachineInstr &, unsigned, unsigned, unsigned, Register,
- Register, Register)>;
+ std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
+ Register, Register, Register)>;
/// For instructions where an immediate operand could be split into two
/// separate immediate instructions, use the splitTwoPartImm two handle the
@@ -93,6 +94,10 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
template <typename T>
+ bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI,
+ SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
+
+ template <typename T>
bool visitAND(unsigned Opc, MachineInstr &MI,
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
bool visitORR(MachineInstr &MI,
@@ -171,20 +176,20 @@ bool AArch64MIPeepholeOpt::visitAND(
return splitTwoPartImm<T>(
MI, ToBeRemoved,
- [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<unsigned> {
+ [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
- return Opc;
+ return std::make_pair(Opc, Opc);
return None;
},
- [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
+ [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
unsigned Imm1, Register SrcReg, Register NewTmpReg,
Register NewDstReg) {
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
- BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
+ BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
.addReg(SrcReg)
.addImm(Imm0);
- BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
+ BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
.addReg(NewTmpReg)
.addImm(Imm1);
});
@@ -273,23 +278,64 @@ bool AArch64MIPeepholeOpt::visitADDSUB(
return splitTwoPartImm<T>(
MI, ToBeRemoved,
[PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
- T &Imm1) -> Optional<unsigned> {
+ T &Imm1) -> Optional<OpcodePair> {
if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
- return PosOpc;
+ return std::make_pair(PosOpc, PosOpc);
if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
- return NegOpc;
+ return std::make_pair(NegOpc, NegOpc);
return None;
},
- [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
+ [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
+ unsigned Imm1, Register SrcReg, Register NewTmpReg,
+ Register NewDstReg) {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock *MBB = MI.getParent();
+ BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
+ .addReg(SrcReg)
+ .addImm(Imm0)
+ .addImm(12);
+ BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
+ .addReg(NewTmpReg)
+ .addImm(Imm1)
+ .addImm(0);
+ });
+}
+
+template <typename T>
+bool AArch64MIPeepholeOpt::visitADDSSUBS(
+ OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI,
+ SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
+ // Try the same transformation as ADDSUB but with additional requirement
+ // that the condition code usages are only for Equal and Not Equal
+ return splitTwoPartImm<T>(
+ MI, ToBeRemoved,
+ [PosOpcs, NegOpcs, &MI, &TRI = TRI, &MRI = MRI](
+ T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
+ OpcodePair OP;
+ if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
+ OP = PosOpcs;
+ else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
+ OP = NegOpcs;
+ else
+ return None;
+ // Check conditional uses last since it is expensive for scanning
+ // proceeding instructions
+ MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
+ Optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
+ if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
+ return None;
+ return OP;
+ },
+ [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
unsigned Imm1, Register SrcReg, Register NewTmpReg,
Register NewDstReg) {
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
- BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
+ BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
.addReg(SrcReg)
.addImm(Imm0)
.addImm(12);
- BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
+ BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
.addReg(NewTmpReg)
.addImm(Imm1)
.addImm(0);
@@ -357,33 +403,57 @@ bool AArch64MIPeepholeOpt::splitTwoPartImm(
// number since it was sign extended when we assign to the 64-bit Imm.
if (SubregToRegMI)
Imm &= 0xFFFFFFFF;
- unsigned Opcode;
+ OpcodePair Opcode;
if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
Opcode = R.getValue();
else
return false;
- // Create new ADD/SUB MIs.
+ // Create new MIs using the first and second opcodes. Opcodes might
diff er for
+ // flag setting operations that should only set flags on second instruction.
+ // NewTmpReg = Opcode.first SrcReg Imm0
+ // NewDstReg = Opcode.second NewTmpReg Imm1
+
+ // Determine register classes for destinations and register operands
MachineFunction *MF = MI.getMF();
- const TargetRegisterClass *RC =
- TII->getRegClass(TII->get(Opcode), 0, TRI, *MF);
- const TargetRegisterClass *ORC =
- TII->getRegClass(TII->get(Opcode), 1, TRI, *MF);
+ const TargetRegisterClass *FirstInstrDstRC =
+ TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
+ const TargetRegisterClass *FirstInstrOperandRC =
+ TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
+ const TargetRegisterClass *SecondInstrDstRC =
+ (Opcode.first == Opcode.second)
+ ? FirstInstrDstRC
+ : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
+ const TargetRegisterClass *SecondInstrOperandRC =
+ (Opcode.first == Opcode.second)
+ ? FirstInstrOperandRC
+ : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
+
+ // Get old registers destinations and new register destinations
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
- Register NewTmpReg = MRI->createVirtualRegister(RC);
- Register NewDstReg = MRI->createVirtualRegister(RC);
-
- MRI->constrainRegClass(SrcReg, RC);
- MRI->constrainRegClass(NewTmpReg, ORC);
- MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
-
+ Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
+ // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
+ // reuse that same destination register.
+ Register NewDstReg = DstReg.isVirtual()
+ ? MRI->createVirtualRegister(SecondInstrDstRC)
+ : DstReg;
+
+ // Constrain registers based on their new uses
+ MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
+ MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
+ if (DstReg != NewDstReg)
+ MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
+
+ // Call the delegating operation to build the instruction
BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
- MRI->replaceRegWith(DstReg, NewDstReg);
// replaceRegWith changes MI's definition register. Keep it for SSA form until
- // deleting MI.
- MI.getOperand(0).setReg(DstReg);
+ // deleting MI. Only if we made a new destination register.
+ if (DstReg != NewDstReg) {
+ MRI->replaceRegWith(DstReg, NewDstReg);
+ MI.getOperand(0).setReg(DstReg);
+ }
// Record the MIs need to be removed.
ToBeRemoved.insert(&MI);
@@ -439,6 +509,26 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI,
ToBeRemoved);
break;
+ case AArch64::ADDSWrr:
+ Changed = visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
+ {AArch64::SUBWri, AArch64::SUBSWri},
+ MI, ToBeRemoved);
+ break;
+ case AArch64::SUBSWrr:
+ Changed = visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
+ {AArch64::ADDWri, AArch64::ADDSWri},
+ MI, ToBeRemoved);
+ break;
+ case AArch64::ADDSXrr:
+ Changed = visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
+ {AArch64::SUBXri, AArch64::SUBSXri},
+ MI, ToBeRemoved);
+ break;
+ case AArch64::SUBSXrr:
+ Changed = visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
+ {AArch64::ADDXri, AArch64::ADDSXri},
+ MI, ToBeRemoved);
+ break;
}
}
}
diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index b95c15ac6d07..dd36bf6e8d35 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-linux-gnu -verify-machineinstrs | FileCheck %s
; Note that this should be refactored (for efficiency if nothing else)
; when the PCS is implemented so we don't have to worry about the
@@ -406,4 +406,290 @@ define i64 @addl_0x80000000(i64 %a) {
ret i64 %b
}
-; TODO: adds/subs
+; ADDS and SUBS Optimizations
+; Checks with all types first, then checks that only EQ and NE optimize
+define i1 @eq_i(i32 %0) {
+; CHECK-LABEL: eq_i:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub w8, w0, #273, lsl #12 // =1118208
+; CHECK-NEXT: cmp w8, #273
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %2 = icmp eq i32 %0, 1118481
+ ret i1 %2
+}
+
+define i1 @eq_l(i64 %0) {
+; CHECK-LABEL: eq_l:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub x8, x0, #273, lsl #12 // =1118208
+; CHECK-NEXT: cmp x8, #273
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %2 = icmp eq i64 %0, 1118481
+ ret i1 %2
+}
+
+define i1 @ne_i(i32 %0) {
+; CHECK-LABEL: ne_i:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub w8, w0, #273, lsl #12 // =1118208
+; CHECK-NEXT: cmp w8, #273
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %2 = icmp ne i32 %0, 1118481
+ ret i1 %2
+}
+
+define i1 @ne_l(i64 %0) {
+; CHECK-LABEL: ne_l:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub x8, x0, #273, lsl #12 // =1118208
+; CHECK-NEXT: cmp x8, #273
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %2 = icmp ne i64 %0, 1118481
+ ret i1 %2
+}
+
+define i1 @eq_in(i32 %0) {
+; CHECK-LABEL: eq_in:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, #273, lsl #12 // =1118208
+; CHECK-NEXT: cmn w8, #273
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %2 = icmp eq i32 %0, -1118481
+ ret i1 %2
+}
+
+define i1 @eq_ln(i64 %0) {
+; CHECK-LABEL: eq_ln:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, #273, lsl #12 // =1118208
+; CHECK-NEXT: cmn x8, #273
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %2 = icmp eq i64 %0, -1118481
+ ret i1 %2
+}
+
+define i1 @ne_in(i32 %0) {
+; CHECK-LABEL: ne_in:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, #273, lsl #12 // =1118208
+; CHECK-NEXT: cmn w8, #273
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %2 = icmp ne i32 %0, -1118481
+ ret i1 %2
+}
+
+define i1 @ne_ln(i64 %0) {
+; CHECK-LABEL: ne_ln:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, #273, lsl #12 // =1118208
+; CHECK-NEXT: cmn x8, #273
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %2 = icmp ne i64 %0, -1118481
+ ret i1 %2
+}
+
+define i1 @reject_eq(i32 %0) {
+; CHECK-LABEL: reject_eq:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #51712
+; CHECK-NEXT: movk w8, #15258, lsl #16
+; CHECK-NEXT: cmp w0, w8
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %2 = icmp eq i32 %0, 1000000000
+ ret i1 %2
+}
+
+define i1 @reject_non_eqne_csinc(i32 %0) {
+; CHECK-LABEL: reject_non_eqne_csinc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #4369
+; CHECK-NEXT: movk w8, #17, lsl #16
+; CHECK-NEXT: cmp w0, w8
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %2 = icmp ult i32 %0, 1118481
+ ret i1 %2
+}
+
+define i32 @accept_csel(i32 %0) {
+; CHECK-LABEL: accept_csel:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub w9, w0, #273, lsl #12 // =1118208
+; CHECK-NEXT: mov w8, #17
+; CHECK-NEXT: cmp w9, #273
+; CHECK-NEXT: mov w9, #11
+; CHECK-NEXT: csel w0, w9, w8, eq
+; CHECK-NEXT: ret
+ %2 = icmp eq i32 %0, 1118481
+ %3 = select i1 %2, i32 11, i32 17
+ ret i32 %3
+}
+
+define i32 @reject_non_eqne_csel(i32 %0) {
+; CHECK-LABEL: reject_non_eqne_csel:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #4369
+; CHECK-NEXT: mov w9, #11
+; CHECK-NEXT: movk w8, #17, lsl #16
+; CHECK-NEXT: cmp w0, w8
+; CHECK-NEXT: mov w8, #17
+; CHECK-NEXT: csel w0, w9, w8, lo
+; CHECK-NEXT: ret
+ %2 = icmp ult i32 %0, 1118481
+ %3 = select i1 %2, i32 11, i32 17
+ ret i32 %3
+}
+
+declare void @fooy()
+
+define void @accept_branch(i32 %0) {
+; CHECK-LABEL: accept_branch:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub w8, w0, #291, lsl #12 // =1191936
+; CHECK-NEXT: cmp w8, #1110
+; CHECK-NEXT: b.eq .LBB32_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: b fooy
+ %2 = icmp ne i32 %0, 1193046
+ br i1 %2, label %4, label %3
+3: ; preds = %1
+ tail call void @fooy()
+ br label %4
+4: ; preds = %3, %1
+ ret void
+}
+
+define void @reject_non_eqne_branch(i32 %0) {
+; CHECK-LABEL: reject_non_eqne_branch:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #13398
+; CHECK-NEXT: movk w8, #18, lsl #16
+; CHECK-NEXT: cmp w0, w8
+; CHECK-NEXT: b.le .LBB33_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: b fooy
+ %2 = icmp sgt i32 %0, 1193046
+ br i1 %2, label %4, label %3
+3: ; preds = %1
+ tail call void @fooy()
+ br label %4
+4: ; preds = %3, %1
+ ret void
+}
+
+define i32 @reject_multiple_usages(i32 %0) {
+; CHECK-LABEL: reject_multiple_usages:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #4369
+; CHECK-NEXT: mov w9, #3
+; CHECK-NEXT: movk w8, #17, lsl #16
+; CHECK-NEXT: mov w10, #17
+; CHECK-NEXT: cmp w0, w8
+; CHECK-NEXT: mov w8, #9
+; CHECK-NEXT: mov w11, #12
+; CHECK-NEXT: csel w8, w8, w9, eq
+; CHECK-NEXT: csel w9, w11, w10, hi
+; CHECK-NEXT: add w8, w8, w9
+; CHECK-NEXT: mov w9, #53312
+; CHECK-NEXT: movk w9, #2, lsl #16
+; CHECK-NEXT: cmp w0, w9
+; CHECK-NEXT: mov w9, #26304
+; CHECK-NEXT: movk w9, #1433, lsl #16
+; CHECK-NEXT: csel w0, w8, w9, hi
+; CHECK-NEXT: ret
+ %2 = icmp eq i32 %0, 1118481
+ %3 = icmp ugt i32 %0, 1118481
+ %4 = select i1 %2, i32 9, i32 3
+ %5 = select i1 %3, i32 12, i32 17
+ %6 = add i32 %4, %5
+ %7 = icmp ugt i32 %0, 184384
+ %8 = select i1 %7, i32 %6, i32 93939392
+ ret i32 %8
+}
+
+; Unique case found in ClangBuiltLinux where the DstReg is not Virtual and
+; caused an assertion failure
+define dso_local i32 @neigh_periodic_work_tbl_1() {
+; CHECK-LABEL: neigh_periodic_work_tbl_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: adrp x8, neigh_periodic_work_tbl_1
+; CHECK-NEXT: add x8, x8, :lo12:neigh_periodic_work_tbl_1
+; CHECK-NEXT: add x8, x8, #18, lsl #12 // =73728
+; CHECK-NEXT: cmn x8, #1272
+; CHECK-NEXT: b.pl .LBB35_2
+; CHECK-NEXT: .LBB35_1: // %for.cond
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: b .LBB35_1
+; CHECK-NEXT: .LBB35_2: // %if.end
+; CHECK-NEXT: ret
+entry:
+ br i1 icmp slt (i64 add (i64 ptrtoint (i32 ()* @neigh_periodic_work_tbl_1 to i64), i64 75000), i64 0), label %for.cond, label %if.end
+for.cond: ; preds = %entry, %for.cond
+ br label %for.cond
+if.end: ; preds = %entry
+ ret i32 undef
+}
+
+ at jiffies = dso_local local_unnamed_addr global i32 0, align 4
+ at primary_crng = dso_local local_unnamed_addr global i32 0, align 4
+ at input_pool = dso_local global i32 0, align 4
+declare dso_local i32 @crng_reseed(...) local_unnamed_addr
+; Function Attrs: nounwind uwtable
+define dso_local i32 @_extract_crng_crng() {
+; CHECK-LABEL: _extract_crng_crng:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: adrp x8, _extract_crng_crng
+; CHECK-NEXT: add x8, x8, :lo12:_extract_crng_crng
+; CHECK-NEXT: tbnz x8, #63, .LBB36_2
+; CHECK-NEXT: // %bb.1: // %lor.lhs.false
+; CHECK-NEXT: adrp x9, jiffies
+; CHECK-NEXT: ldrsw x9, [x9, :lo12:jiffies]
+; CHECK-NEXT: sub x8, x8, x9
+; CHECK-NEXT: add x8, x8, #18, lsl #12 // =73728
+; CHECK-NEXT: cmn x8, #1272
+; CHECK-NEXT: b.pl .LBB36_3
+; CHECK-NEXT: .LBB36_2: // %if.then
+; CHECK-NEXT: adrp x8, primary_crng
+; CHECK-NEXT: adrp x9, input_pool
+; CHECK-NEXT: add x9, x9, :lo12:input_pool
+; CHECK-NEXT: ldr w8, [x8, :lo12:primary_crng]
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: csel x0, xzr, x9, eq
+; CHECK-NEXT: bl crng_reseed
+; CHECK-NEXT: .LBB36_3: // %if.end
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ br i1 icmp slt (i32 ()* @_extract_crng_crng, i32 ()* null), label %if.then, label %lor.lhs.false
+lor.lhs.false: ; preds = %entry
+ %0 = load i32, i32* @jiffies, align 4
+ %idx.ext = sext i32 %0 to i64
+ %idx.neg = sub nsw i64 0, %idx.ext
+ %add.ptr = getelementptr i8, i8* getelementptr (i8, i8* bitcast (i32 ()* @_extract_crng_crng to i8*), i64 75000), i64 %idx.neg
+ %cmp = icmp slt i8* %add.ptr, null
+ br i1 %cmp, label %if.then, label %if.end
+if.then: ; preds = %lor.lhs.false, %entry
+ %1 = load i32, i32* @primary_crng, align 4
+ %tobool.not = icmp eq i32 %1, 0
+ %cond = select i1 %tobool.not, i32* null, i32* @input_pool
+ %call = tail call i32 bitcast (i32 (...)* @crng_reseed to i32 (i32*)*)(i32* noundef %cond)
+ br label %if.end
+if.end: ; preds = %if.then, %lor.lhs.false
+ ret i32 undef
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll b/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll
index f5a4a45adbc0..0c2cf1778722 100644
--- a/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll
@@ -12,8 +12,8 @@
; YAML: - INST_add: '2'
; YAML: - INST_b.: '1'
; YAML: - INST_ldr: '1'
-; YAML: - INST_movk: '1'
-; YAML: - INST_movz: '1'
+; YAML: - INST_orr: '1'
+; YAML: - INST_sub: '1'
; YAML: - INST_subs: '1'
; YAML: Name: InstructionMix
@@ -27,13 +27,12 @@
define i32 @foo(i32* %ptr, i32 %x, i64 %y) !dbg !3 {
; CHECK-LABEL: foo:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: ldr w10, [x0]
+; CHECK-NEXT: ldr w9, [x0]
; CHECK-NEXT: mov x8, x0
-; CHECK-NEXT: mov w9, #16959
-; CHECK-NEXT: movk w9, #15, lsl #16
-; CHECK-NEXT: add w0, w10, w1
-; CHECK-NEXT: add x10, x0, x2
-; CHECK-NEXT: cmp x10, x9
+; CHECK-NEXT: add w0, w9, w1
+; CHECK-NEXT: add x9, x0, x2
+; CHECK-NEXT: sub x9, x9, #244, lsl #12 ; =999424
+; CHECK-NEXT: cmp x9, #575
; CHECK-NEXT: b.eq LBB0_2
; CHECK-NEXT: ; %bb.1: ; %else
; CHECK-NEXT: mul w9, w0, w1
More information about the llvm-commits
mailing list