[llvm] r311943 - [AArch64] Adjust the cost model for Exynos M1 and M2
Evandro Menezes via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 28 15:51:32 PDT 2017
Author: evandro
Date: Mon Aug 28 15:51:32 2017
New Revision: 311943
URL: http://llvm.org/viewvc/llvm-project?rev=311943&view=rev
Log:
[AArch64] Adjust the cost model for Exynos M1 and M2
Add new predicate to more accurately model the cost of arithmetic and
logical operations shifted left.
Differential revision: https://reviews.llvm.org/D37151
Modified:
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
llvm/trunk/lib/Target/AArch64/AArch64SchedM1.td
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=311943&r1=311942&r2=311943&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Mon Aug 28 15:51:32 2017
@@ -673,8 +673,9 @@ static bool canBeExpandedToORR(const Mac
bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
if (!Subtarget.hasCustomCheapAsMoveHandling())
return MI.isAsCheapAsAMove();
-
- unsigned Imm;
+ if (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
+ isExynosShiftLeftFast(MI))
+ return true;
switch (MI.getOpcode()) {
default:
@@ -685,17 +686,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
case AArch64::ADDXri:
case AArch64::SUBWri:
case AArch64::SUBXri:
- return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
- MI.getOperand(3).getImm() == 0);
-
- // add/sub on register with shift
- case AArch64::ADDWrs:
- case AArch64::ADDXrs:
- case AArch64::SUBWrs:
- case AArch64::SUBXrs:
- Imm = MI.getOperand(3).getImm();
- return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
- AArch64_AM::getArithShiftValue(Imm) < 4);
+ return (MI.getOperand(3).getImm() == 0);
// logical ops on immediate
case AArch64::ANDWri:
@@ -721,24 +712,6 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
case AArch64::ORRXrr:
return true;
- // logical ops on register with shift
- case AArch64::ANDWrs:
- case AArch64::ANDXrs:
- case AArch64::BICWrs:
- case AArch64::BICXrs:
- case AArch64::EONWrs:
- case AArch64::EONXrs:
- case AArch64::EORWrs:
- case AArch64::EORXrs:
- case AArch64::ORNWrs:
- case AArch64::ORNXrs:
- case AArch64::ORRWrs:
- case AArch64::ORRXrs:
- Imm = MI.getOperand(3).getImm();
- return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
- AArch64_AM::getShiftValue(Imm) < 4 &&
- AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
-
// If MOVi32imm or MOVi64imm can be expanded into ORRWri or
// ORRXri, it is as cheap as MOV
case AArch64::MOVi32imm:
@@ -761,6 +734,74 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
llvm_unreachable("Unknown opcode to check as cheap as a move!");
}
+bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
+ unsigned Imm, Shift;
+
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+
+ // WriteI
+ case AArch64::ADDSWri:
+ case AArch64::ADDSXri:
+ case AArch64::ADDWri:
+ case AArch64::ADDXri:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSXri:
+ case AArch64::SUBWri:
+ case AArch64::SUBXri:
+ return true;
+
+ // WriteISReg
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXrs:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ case AArch64::EONWrs:
+ case AArch64::EONXrs:
+ case AArch64::EORWrs:
+ case AArch64::EORXrs:
+ case AArch64::ORNWrs:
+ case AArch64::ORNXrs:
+ case AArch64::ORRWrs:
+ case AArch64::ORRXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ Imm = MI.getOperand(3).getImm();
+ Shift = AArch64_AM::getShiftValue(Imm);
+ return (Shift == 0 ||
+ (Shift <= 3 && AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL));
+
+ // WriteIEReg
+ case AArch64::ADDSWrx:
+ case AArch64::ADDSXrx:
+ case AArch64::ADDSXrx64:
+ case AArch64::ADDWrx:
+ case AArch64::ADDXrx:
+ case AArch64::ADDXrx64:
+ case AArch64::SUBSWrx:
+ case AArch64::SUBSXrx:
+ case AArch64::SUBSXrx64:
+ case AArch64::SUBWrx:
+ case AArch64::SUBXrx:
+ case AArch64::SUBXrx64:
+ Imm = MI.getOperand(3).getImm();
+ Shift = AArch64_AM::getArithShiftValue(Imm);
+ return (Shift == 0 ||
+ (Shift <= 3 && AArch64_AM::getExtendType(Imm) == AArch64_AM::UXTX));
+ }
+}
+
bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=311943&r1=311942&r2=311943&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h Mon Aug 28 15:51:32 2017
@@ -367,6 +367,9 @@ public:
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
unsigned CallClass) const override;
+ /// Returns true if the instruction has a shift left that can be executed
+ /// more efficiently.
+ bool isExynosShiftLeftFast(const MachineInstr &MI) const;
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
bool isFalkorShiftExtFast(const MachineInstr &MI) const;
Modified: llvm/trunk/lib/Target/AArch64/AArch64SchedM1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64SchedM1.td?rev=311943&r1=311942&r2=311943&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64SchedM1.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64SchedM1.td Mon Aug 28 15:51:32 2017
@@ -62,16 +62,25 @@ let SchedModel = ExynosM1Model in {
let SchedModel = ExynosM1Model in {
//===----------------------------------------------------------------------===//
-// Coarse scheduling model for the Exynos-M1.
+// Predicates.
+
+def M1ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
+
+//===----------------------------------------------------------------------===//
+// Coarse scheduling model.
def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
def M1WriteA2 : SchedWriteRes<[M1UnitALU]> { let Latency = 2; }
+def M1WriteAA : SchedWriteRes<[M1UnitALU]> { let Latency = 2;
+ let ResourceCycles = [2]; }
+def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteA1]>,
+ SchedVar<NoSchedPred, [M1WriteAA]>]>;
def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; }
def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
-def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
+def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
-def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
+def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
def M1WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteL5,
M1WriteA1]>,
SchedVar<NoSchedPred, [M1WriteL5]>]>;
@@ -85,7 +94,6 @@ def M1WriteSX : SchedWriteVariant<[Sched
def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>]>;
-def : SchedAlias<ReadAdrBase, M1ReadAdrBase>;
// Branch instructions.
// NOTE: Unconditional direct branches actually take neither cycles nor units.
@@ -94,7 +102,6 @@ def : WriteRes<WriteBrReg, [M1UnitC]> {
// Arithmetic and logical integer instructions.
def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; }
-// TODO: Shift over 3 and some extensions take 2 cycles.
def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; }
def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; }
def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; }
@@ -110,7 +117,6 @@ def : WriteRes<WriteID64, [M1UnitC,
M1UnitD]> { let Latency = 21;
let ResourceCycles = [1, 21]; }
// TODO: Long multiplication take 5 cycles and also the ALU.
-// TODO: Multiplication with accumulation can be advanced.
def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; }
// TODO: 64-bit multiplication has a throughput of 1/2.
def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4; }
@@ -119,8 +125,10 @@ def : WriteRes<WriteIM64, [M1UnitC]> { l
def : WriteRes<WriteExtr, [M1UnitALU,
M1UnitALU]> { let Latency = 2; }
+// Addressing modes.
// TODO: The latency for the post or pre register is 1 cycle.
def : WriteRes<WriteAdr, []> { let Latency = 0; }
+def : SchedAlias<ReadAdrBase, M1ReadAdrBase>;
// Load instructions.
def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; }
@@ -164,12 +172,10 @@ def : WriteRes<WriteSys, []> { let L
// Generic fast forwarding.
// TODO: Add FP register forwarding rules.
-
def : ReadAdvance<ReadI, 0>;
def : ReadAdvance<ReadISReg, 0>;
def : ReadAdvance<ReadIEReg, 0>;
def : ReadAdvance<ReadIM, 0>;
-// Integer multiply-accumulate.
// TODO: The forwarding for WriteIM64 saves actually 3 cycles.
def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
def : ReadAdvance<ReadID, 0>;
@@ -178,7 +184,7 @@ def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
//===----------------------------------------------------------------------===//
-// Finer scheduling model for the Exynos-M1.
+// Finer scheduling model.
def M1WriteNEONA : SchedWriteRes<[M1UnitNALU,
M1UnitNALU,
@@ -287,7 +293,6 @@ def M1WriteVLDN : SchedWriteRes<[M1Un
M1UnitL,
M1UnitL]> { let Latency = 14;
let ResourceCycles = [7]; }
-
def M1WriteVSTA : WriteSequence<[WriteVST], 2>;
def M1WriteVSTB : WriteSequence<[WriteVST], 3>;
def M1WriteVSTC : WriteSequence<[WriteVST], 4>;
@@ -340,7 +345,6 @@ def M1WriteVSTI : SchedWriteRes<[M1Un
// Branch instructions
def : InstRW<[M1WriteB1], (instrs Bcc)>;
-// NOTE: Conditional branch and link adds a B uop.
def : InstRW<[M1WriteA1], (instrs BL)>;
// NOTE: Indirect branch and link with LR adds an ALU uop.
def : InstRW<[M1WriteA1,
@@ -351,6 +355,7 @@ def : InstRW<[M1WriteC1,
// Arithmetic and logical integer instructions.
def : InstRW<[M1WriteA1], (instrs COPY)>;
+def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
// Divide and multiply instructions.
@@ -413,10 +418,12 @@ def : InstRW<[M1WriteNMISC4], (instregex
def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>;
def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>;
def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>;
-def : InstRW<[M1WriteNALU1], (instregex "^[SU]?SH(L|LL|R)2?v")>;
-def : InstRW<[M1WriteNALU1], (instregex "^S[LR]Iv")>;
-def : InstRW<[M1WriteNAL13], (instregex "^[SU]?(Q|QR|R)?SHR(N|U|UN)?2?v")>;
-def : InstRW<[M1WriteNAL13], (instregex "^[SU](Q|QR|R)SHLU?v")>;
+def : InstRW<[M1WriteNALU1], (instregex "^SHL[dv]")>;
+def : InstRW<[M1WriteNALU1], (instregex "^[SU]SH[LR][dv]")>;
+def : InstRW<[M1WriteNALU1], (instregex "^S[RS]I[dv]")>;
+def : InstRW<[M1WriteNAL13], (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>;
+def : InstRW<[M1WriteNAL13], (instregex "^[SU]RSH[LR][dv]")>;
+def : InstRW<[M1WriteNAL13], (instregex "^[SU]QR?SHLU?[bdhsv]")>;
// ASIMD FP instructions.
def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>;
More information about the llvm-commits
mailing list