[llvm] r348774 - [AArch64] Refactor the Exynos scheduling predicates
Evandro Menezes via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 10 09:17:26 PST 2018
Author: evandro
Date: Mon Dec 10 09:17:26 2018
New Revision: 348774
URL: http://llvm.org/viewvc/llvm-project?rev=348774&view=rev
Log:
[AArch64] Refactor the Exynos scheduling predicates
Refactor the scheduling predicates based on `MCInstPredicate`. In this
case, for the Exynos processors.
Differential revision: https://reviews.llvm.org/D55345
Added:
llvm/trunk/lib/Target/AArch64/AArch64SchedPredExynos.td
Modified:
llvm/trunk/include/llvm/Target/TargetInstrPredicate.td
llvm/trunk/lib/Target/AArch64/AArch64.td
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
llvm/trunk/lib/Target/AArch64/AArch64SchedExynosM1.td
llvm/trunk/lib/Target/AArch64/AArch64SchedExynosM3.td
llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/extended-register.s
llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/register-offset.s
llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s
Modified: llvm/trunk/include/llvm/Target/TargetInstrPredicate.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrPredicate.td?rev=348774&r1=348773&r2=348774&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetInstrPredicate.td (original)
+++ llvm/trunk/include/llvm/Target/TargetInstrPredicate.td Mon Dec 10 09:17:26 2018
@@ -39,7 +39,7 @@
// processor scheduling model.
//
// The `MCInstPredicateExample` definition above is equivalent (and therefore
-// could replace) the following definition from the ExynosM3 model (see
+// could replace) the following definition from a previous ExynosM3 model (see
// AArch64SchedExynosM3.td):
//
// def M3BranchLinkFastPred : SchedPredicate<[{
Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=348774&r1=348773&r2=348774&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.td Mon Dec 10 09:17:26 2018
@@ -371,6 +371,7 @@ include "AArch64CallingConvention.td"
include "AArch64Schedule.td"
include "AArch64InstrInfo.td"
include "AArch64SchedPredicates.td"
+include "AArch64SchedPredExynos.td"
def AArch64InstrInfo : InstrInfo;
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=348774&r1=348773&r2=348774&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Mon Dec 10 09:17:26 2018
@@ -705,7 +705,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
// Secondly, check cases specific to sub-targets.
if (Subtarget.hasExynosCheapAsMoveHandling()) {
- if (isExynosResetFast(MI) || isExynosShiftExtFast(MI))
+ if (isExynosCheapAsMove(MI))
return true;
return MI.isAsCheapAsAMove();
@@ -759,213 +759,6 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
llvm_unreachable("Unknown opcode to check as cheap as a move!");
}
-bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) {
- unsigned Reg, Imm, Shift;
-
- switch (MI.getOpcode()) {
- default:
- return false;
-
- // MOV Rd, SP
- case AArch64::ADDWri:
- case AArch64::ADDXri:
- if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
- return false;
-
- Reg = MI.getOperand(1).getReg();
- Imm = MI.getOperand(2).getImm();
- return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
-
- // Literal
- case AArch64::ADR:
- case AArch64::ADRP:
- return true;
-
- // MOVI Vd, #0
- case AArch64::MOVID:
- case AArch64::MOVIv8b_ns:
- case AArch64::MOVIv2d_ns:
- case AArch64::MOVIv16b_ns:
- Imm = MI.getOperand(1).getImm();
- return (Imm == 0);
-
- // MOVI Vd, #0
- case AArch64::MOVIv2i32:
- case AArch64::MOVIv4i16:
- case AArch64::MOVIv4i32:
- case AArch64::MOVIv8i16:
- Imm = MI.getOperand(1).getImm();
- Shift = MI.getOperand(2).getImm();
- return (Imm == 0 && Shift == 0);
-
- // MOV Rd, Imm
- case AArch64::MOVNWi:
- case AArch64::MOVNXi:
-
- // MOV Rd, Imm
- case AArch64::MOVZWi:
- case AArch64::MOVZXi:
- return true;
-
- // MOV Rd, Imm
- case AArch64::ORRWri:
- case AArch64::ORRXri:
- if (!MI.getOperand(1).isReg())
- return false;
-
- Reg = MI.getOperand(1).getReg();
- Imm = MI.getOperand(2).getImm();
- return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
-
- // MOV Rd, Rm
- case AArch64::ORRWrs:
- case AArch64::ORRXrs:
- if (!MI.getOperand(1).isReg())
- return false;
-
- Reg = MI.getOperand(1).getReg();
- Imm = MI.getOperand(3).getImm();
- Shift = AArch64_AM::getShiftValue(Imm);
- return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
- }
-}
-
-bool AArch64InstrInfo::isExynosLdStExtFast(const MachineInstr &MI) {
- unsigned Imm;
- AArch64_AM::ShiftExtendType Ext;
-
- switch (MI.getOpcode()) {
- default:
- return false;
-
- // WriteLD
- case AArch64::PRFMroW:
- case AArch64::PRFMroX:
-
- // WriteLDIdx
- case AArch64::LDRBBroW:
- case AArch64::LDRBBroX:
- case AArch64::LDRHHroW:
- case AArch64::LDRHHroX:
- case AArch64::LDRSBWroW:
- case AArch64::LDRSBWroX:
- case AArch64::LDRSBXroW:
- case AArch64::LDRSBXroX:
- case AArch64::LDRSHWroW:
- case AArch64::LDRSHWroX:
- case AArch64::LDRSHXroW:
- case AArch64::LDRSHXroX:
- case AArch64::LDRSWroW:
- case AArch64::LDRSWroX:
- case AArch64::LDRWroW:
- case AArch64::LDRWroX:
- case AArch64::LDRXroW:
- case AArch64::LDRXroX:
-
- case AArch64::LDRBroW:
- case AArch64::LDRBroX:
- case AArch64::LDRDroW:
- case AArch64::LDRDroX:
- case AArch64::LDRHroW:
- case AArch64::LDRHroX:
- case AArch64::LDRSroW:
- case AArch64::LDRSroX:
-
- // WriteSTIdx
- case AArch64::STRBBroW:
- case AArch64::STRBBroX:
- case AArch64::STRHHroW:
- case AArch64::STRHHroX:
- case AArch64::STRWroW:
- case AArch64::STRWroX:
- case AArch64::STRXroW:
- case AArch64::STRXroX:
-
- case AArch64::STRBroW:
- case AArch64::STRBroX:
- case AArch64::STRDroW:
- case AArch64::STRDroX:
- case AArch64::STRHroW:
- case AArch64::STRHroX:
- case AArch64::STRSroW:
- case AArch64::STRSroX:
- Imm = MI.getOperand(3).getImm();
- Ext = AArch64_AM::getMemExtendType(Imm);
- return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
- }
-}
-
-bool AArch64InstrInfo::isExynosShiftExtFast(const MachineInstr &MI) {
- unsigned Imm, Shift;
- AArch64_AM::ShiftExtendType Ext = AArch64_AM::UXTX;
-
- switch (MI.getOpcode()) {
- default:
- return false;
-
- // WriteI
- case AArch64::ADDSWri:
- case AArch64::ADDSXri:
- case AArch64::ADDWri:
- case AArch64::ADDXri:
- case AArch64::SUBSWri:
- case AArch64::SUBSXri:
- case AArch64::SUBWri:
- case AArch64::SUBXri:
- return true;
-
- // WriteISReg
- case AArch64::ADDSWrs:
- case AArch64::ADDSXrs:
- case AArch64::ADDWrs:
- case AArch64::ADDXrs:
- case AArch64::ANDSWrs:
- case AArch64::ANDSXrs:
- case AArch64::ANDWrs:
- case AArch64::ANDXrs:
- case AArch64::BICSWrs:
- case AArch64::BICSXrs:
- case AArch64::BICWrs:
- case AArch64::BICXrs:
- case AArch64::EONWrs:
- case AArch64::EONXrs:
- case AArch64::EORWrs:
- case AArch64::EORXrs:
- case AArch64::ORNWrs:
- case AArch64::ORNXrs:
- case AArch64::ORRWrs:
- case AArch64::ORRXrs:
- case AArch64::SUBSWrs:
- case AArch64::SUBSXrs:
- case AArch64::SUBWrs:
- case AArch64::SUBXrs:
- Imm = MI.getOperand(3).getImm();
- Shift = AArch64_AM::getShiftValue(Imm);
- Ext = AArch64_AM::getShiftType(Imm);
- return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
-
- // WriteIEReg
- case AArch64::ADDSWrx:
- case AArch64::ADDSXrx:
- case AArch64::ADDWrx:
- case AArch64::ADDXrx:
- case AArch64::SUBSWrx:
- case AArch64::SUBSXrx:
- case AArch64::SUBWrx:
- case AArch64::SUBXrx:
- Ext = AArch64_AM::UXTW;
- LLVM_FALLTHROUGH;
- case AArch64::ADDSXrx64:
- case AArch64::ADDXrx64:
- case AArch64::SUBSXrx64:
- case AArch64::SUBXrx64:
- Imm = MI.getOperand(3).getImm();
- Shift = AArch64_AM::getArithShiftValue(Imm);
- return (Shift == 0 ||
- (Shift <= 3 && Ext == AArch64_AM::getArithExtendType(Imm)));
- }
-}
-
bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=348774&r1=348773&r2=348774&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h Mon Dec 10 09:17:26 2018
@@ -241,15 +241,6 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
- /// Returns true if the instruction sets a constant value that can be
- /// executed more efficiently.
- static bool isExynosResetFast(const MachineInstr &MI);
- /// Returns true if the load or store has an extension that can be executed
- /// more efficiently.
- static bool isExynosLdStExtFast(const MachineInstr &MI);
- /// Returns true if the instruction has a constant shift left or extension
- /// that can be executed more efficiently.
- static bool isExynosShiftExtFast(const MachineInstr &MI);
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
static bool isFalkorShiftExtFast(const MachineInstr &MI);
Modified: llvm/trunk/lib/Target/AArch64/AArch64SchedExynosM1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64SchedExynosM1.td?rev=348774&r1=348773&r2=348774&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64SchedExynosM1.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64SchedExynosM1.td Mon Dec 10 09:17:26 2018
@@ -62,14 +62,6 @@ def M1UnitNALU : ProcResGroup<[M1UnitNAL
M1UnitNAL1]>; // All simple vector
//===----------------------------------------------------------------------===//
-// Predicates.
-
-def M1BranchLinkPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
- MI->getOperand(0).getReg() != AArch64::LR}]>;
-def M1LdStExtPred : SchedPredicate<[{TII->isExynosLdStExtFast(*MI)}]>;
-def M1ShiftExtPred : SchedPredicate<[{TII->isExynosShiftExtFast(*MI)}]>;
-
-//===----------------------------------------------------------------------===//
// Coarse scheduling model.
def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
@@ -86,14 +78,16 @@ def M1WriteAC : SchedWriteRes<[M1UnitALU
def M1WriteAD : SchedWriteRes<[M1UnitALU,
M1UnitC]> { let Latency = 2;
let NumMicroOps = 2; }
-def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftExtPred, [M1WriteA1]>,
- SchedVar<NoSchedPred, [M1WriteAA]>]>;
+def M1WriteAX : SchedWriteVariant<[SchedVar<ExynosExtPred, [M1WriteA1]>,
+ SchedVar<NoSchedPred, [M1WriteAA]>]>;
+def M1WriteAY : SchedWriteVariant<[SchedVar<ExynosShiftPred, [M1WriteA1]>,
+ SchedVar<NoSchedPred, [M1WriteAA]>]>;
def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; }
def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
-def M1WriteBX : SchedWriteVariant<[SchedVar<M1BranchLinkPred, [M1WriteAB]>,
- SchedVar<NoSchedPred, [M1WriteAC]>]>;
+def M1WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M1WriteAC]>,
+ SchedVar<NoSchedPred, [M1WriteAB]>]>;
def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; }
@@ -111,40 +105,27 @@ def M1WriteLD : SchedWriteRes<[M1UnitL,
let ResourceCycles = [2, 1]; }
def M1WriteLH : SchedWriteRes<[]> { let Latency = 5;
let NumMicroOps = 0; }
-def M1WriteLX : SchedWriteVariant<[SchedVar<M1LdStExtPred, [M1WriteL5]>,
- SchedVar<NoSchedPred, [M1WriteLC]>]>;
-def M1WriteLY : SchedWriteVariant<[SchedVar<M1LdStExtPred, [M1WriteL5]>,
- SchedVar<NoSchedPred, [M1WriteLD]>]>;
+def M1WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteLC]>,
+ SchedVar<NoSchedPred, [M1WriteL5]>]>;
def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
def M1WriteS3 : SchedWriteRes<[M1UnitS]> { let Latency = 3; }
def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
def M1WriteSA : SchedWriteRes<[M1UnitS,
M1UnitFST,
- M1UnitS,
- M1UnitFST]> { let Latency = 1;
- let NumMicroOps = 2; }
-def M1WriteSB : SchedWriteRes<[M1UnitS,
- M1UnitFST,
M1UnitA]> { let Latency = 3;
let NumMicroOps = 2; }
-def M1WriteSC : SchedWriteRes<[M1UnitS,
+def M1WriteSB : SchedWriteRes<[M1UnitS,
M1UnitFST,
M1UnitS,
M1UnitFST,
M1UnitA]> { let Latency = 3;
let NumMicroOps = 3; }
-def M1WriteSD : SchedWriteRes<[M1UnitS,
- M1UnitFST,
- M1UnitA]> { let Latency = 1;
- let NumMicroOps = 2; }
-def M1WriteSE : SchedWriteRes<[M1UnitS,
+def M1WriteSC : SchedWriteRes<[M1UnitS,
M1UnitA]> { let Latency = 2;
let NumMicroOps = 2; }
-def M1WriteSX : SchedWriteVariant<[SchedVar<M1LdStExtPred, [M1WriteS1]>,
- SchedVar<NoSchedPred, [M1WriteSE]>]>;
-def M1WriteSY : SchedWriteVariant<[SchedVar<M1LdStExtPred, [M1WriteS1]>,
- SchedVar<NoSchedPred, [M1WriteSB]>]>;
+def M1WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteSC]>,
+ SchedVar<NoSchedPred, [M1WriteS1]>]>;
def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>]>;
@@ -415,9 +396,9 @@ def M1WriteVSTH : SchedWriteRes<[M1Un
M1UnitS,
M1UnitFST,
M1UnitFST,
- M1UnitFST]> { let Latency = 14;
- let NumMicroOps = 4;
- let ResourceCycles = [1, 7, 1, 7, 1]; }
+ M1UnitFST]> { let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 7, 1, 7, 1]; }
def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
M1UnitS,
M1UnitFST,
@@ -428,9 +409,17 @@ def M1WriteVSTI : SchedWriteRes<[M1Un
M1UnitS,
M1UnitFST,
M1UnitFST,
- M1UnitFST]> { let Latency = 17;
- let NumMicroOps = 7;
- let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
+ M1UnitFST]> { let Latency = 17;
+ let NumMicroOps = 7;
+ let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
+
+// Special cases.
+def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
+def M1WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M1WriteNALU1]>,
+ SchedVar<NoSchedPred, [M1WriteA1]>]>;
+
+// Fast forwarding.
+def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
// Branch instructions
def : InstRW<[M1WriteB1], (instrs Bcc)>;
@@ -440,21 +429,34 @@ def : InstRW<[M1WriteC1], (instregex "^C
def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>;
// Arithmetic and logical integer instructions.
-def : InstRW<[M1WriteA1], (instrs COPY)>;
-def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
+def : InstRW<[M1WriteAX], (instregex ".+rx(64)?$")>;
+def : InstRW<[M1WriteAY], (instregex ".+rs$")>;
+
+// Move instructions.
+def : InstRW<[M1WriteCOPY], (instrs COPY)>;
// Divide and multiply instructions.
// Miscellaneous instructions.
// Load instructions.
+def : InstRW<[M1WriteLC,
+ ReadAdrBase], (instregex "^LDR(BB|HH|SBW|SBX|SHW|SWX|SW|W|X)roW")>;
+def : InstRW<[M1WriteL5,
+ ReadAdrBase], (instregex "^LDR(BB|HH|SBW|SBX|SHW|SWX|SW|W|X)roX")>;
def : InstRW<[M1WriteLB,
WriteLDHi,
WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
-def : InstRW<[M1WriteLX,
- ReadAdrBase], (instregex "^PRFMro[WX]")>;
+def : InstRW<[M1WriteLC,
+ ReadAdrBase], (instrs PRFMroW)>;
+def : InstRW<[M1WriteL5,
+ ReadAdrBase], (instrs PRFMroX)>;
// Store instructions.
+def : InstRW<[M1WriteSC,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
// FP data instructions.
def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>;
@@ -488,8 +490,10 @@ def : InstRW<[WriteVLD], (instregex "
def : InstRW<[WriteVLD,
WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[M1WriteLY,
- ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
+def : InstRW<[M1WriteLD,
+ ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
+def : InstRW<[WriteVLD,
+ ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
def : InstRW<[M1WriteLD,
ReadAdrBase], (instregex "^LDRQro[WX]")>;
def : InstRW<[WriteVLD,
@@ -508,14 +512,16 @@ def : InstRW<[WriteVST], (instregex "
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
-def : InstRW<[M1WriteSY,
- ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
-def : InstRW<[M1WriteSB,
+def : InstRW<[M1WriteSA,
+ ReadAdrBase], (instregex "^STR[BDHS]roW")>;
+def : InstRW<[WriteVST,
+ ReadAdrBase], (instregex "^STR[BDHS]roX")>;
+def : InstRW<[M1WriteSA,
ReadAdrBase], (instregex "^STRQro[WX]")>;
def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STP[DS](post|pre)")>;
-def : InstRW<[M1WriteSC,
+def : InstRW<[M1WriteSB,
WriteAdr], (instregex "^STPQ(post|pre)")>;
// ASIMD instructions.
@@ -609,21 +615,21 @@ def : InstRW<[M1WriteVLDE], (instregex
def : InstRW<[M1WriteVLDE,
WriteAdr], (instregex "LD1i(64)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Rv(1d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Rv(1d)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
def : InstRW<[M1WriteVLDA,
@@ -831,8 +837,6 @@ def : InstRW<[M1WriteVSTI,
WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
// Cryptography instructions.
-def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
-def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
Modified: llvm/trunk/lib/Target/AArch64/AArch64SchedExynosM3.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64SchedExynosM3.td?rev=348774&r1=348773&r2=348774&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64SchedExynosM3.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64SchedExynosM3.td Mon Dec 10 09:17:26 2018
@@ -104,20 +104,6 @@ def M3UnitNSHF : ProcResGroup<[M3UnitNSH
M3UnitNSHF2]>;
//===----------------------------------------------------------------------===//
-// Predicates.
-
-def M3BranchLinkPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
- MI->getOperand(0).isReg() &&
- MI->getOperand(0).getReg() != AArch64::LR}]>;
-def M3ResetPred : SchedPredicate<[{TII->isExynosResetFast(*MI)}]>;
-def M3RotatePred : SchedPredicate<[{(MI->getOpcode() == AArch64::EXTRWrri ||
- MI->getOpcode() == AArch64::EXTRXrri) &&
- MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg()}]>;
-def M3LdStExtPred : SchedPredicate<[{TII->isExynosLdStExtFast(*MI)}]>;
-def M3ShiftExtPred : SchedPredicate<[{TII->isExynosShiftExtFast(*MI)}]>;
-
-//===----------------------------------------------------------------------===//
// Coarse scheduling model.
def M3WriteZ0 : SchedWriteRes<[]> { let Latency = 0;
@@ -138,15 +124,23 @@ def M3WriteAD : SchedWriteRes<[M3UnitALU
let NumMicroOps = 2; }
def M3WriteC1 : SchedWriteRes<[M3UnitC]> { let Latency = 1; }
def M3WriteC2 : SchedWriteRes<[M3UnitC]> { let Latency = 2; }
-def M3WriteAX : SchedWriteVariant<[SchedVar<M3ResetPred, [M3WriteZ0]>,
- SchedVar<M3ShiftExtPred, [M3WriteA1]>,
- SchedVar<NoSchedPred, [M3WriteAA]>]>;
-def M3WriteAY : SchedWriteVariant<[SchedVar<M3RotatePred, [M3WriteA1]>,
- SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+ SchedVar<ExynosShiftPred, [M3WriteA1]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAV : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAW : SchedWriteVariant<[SchedVar<IsZeroIdiomPred, [M3WriteZ0]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAX : SchedWriteVariant<[SchedVar<ExynosExtPred, [M3WriteA1]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M3WriteA1]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAZ : SchedWriteVariant<[SchedVar<ExynosShiftPred, [M3WriteA1]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; }
-def M3WriteBX : SchedWriteVariant<[SchedVar<M3BranchLinkPred, [M3WriteAB]>,
- SchedVar<NoSchedPred, [M3WriteAC]>]>;
+def M3WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M3WriteAC]>,
+ SchedVar<NoSchedPred, [M3WriteAB]>]>;
def M3WriteL4 : SchedWriteRes<[M3UnitL]> { let Latency = 4; }
def M3WriteL5 : SchedWriteRes<[M3UnitL]> { let Latency = 5; }
@@ -163,28 +157,24 @@ def M3WriteLC : SchedWriteRes<[M3UnitA,
def M3WriteLD : SchedWriteRes<[M3UnitA,
M3UnitL]> { let Latency = 4;
let NumMicroOps = 2; }
+def M3WriteLE : SchedWriteRes<[M3UnitA,
+ M3UnitL]> { let Latency = 6;
+ let NumMicroOps = 2; }
def M3WriteLH : SchedWriteRes<[]> { let Latency = 5;
let NumMicroOps = 0; }
-
-def M3WriteLX : SchedWriteVariant<[SchedVar<M3LdStExtPred, [M3WriteL5]>,
- SchedVar<NoSchedPred, [M3WriteLB]>]>;
+def M3WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M3WriteLB]>,
+ SchedVar<NoSchedPred, [M3WriteL5]>]>;
def M3WriteS1 : SchedWriteRes<[M3UnitS]> { let Latency = 1; }
def M3WriteSA : SchedWriteRes<[M3UnitA,
M3UnitS,
- M3UnitFST]> { let Latency = 2;
+ M3UnitFST]> { let Latency = 3;
let NumMicroOps = 2; }
def M3WriteSB : SchedWriteRes<[M3UnitA,
- M3UnitS]> { let Latency = 1;
- let NumMicroOps = 2; }
-def M3WriteSC : SchedWriteRes<[M3UnitA,
M3UnitS]> { let Latency = 2;
let NumMicroOps = 2; }
-
-def M3WriteSX : SchedWriteVariant<[SchedVar<M3LdStExtPred, [M3WriteS1]>,
- SchedVar<NoSchedPred, [M3WriteSB]>]>;
-def M3WriteSY : SchedWriteVariant<[SchedVar<M3LdStExtPred, [M3WriteS1]>,
- SchedVar<NoSchedPred, [M3WriteSC]>]>;
+def M3WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M3WriteSB]>,
+ SchedVar<NoSchedPred, [M3WriteS1]>]>;
def M3ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>]>;
@@ -214,9 +204,7 @@ def : WriteRes<WriteIM64, [M3UnitC]> {
let ResourceCycles = [2]; }
// Miscellaneous instructions.
-def : WriteRes<WriteExtr, [M3UnitALU,
- M3UnitALU]> { let Latency = 1;
- let NumMicroOps = 2; }
+def : SchedAlias<WriteExtr, M3WriteAY>;
// Addressing modes.
def : WriteRes<WriteAdr, []> { let Latency = 1;
@@ -479,11 +467,15 @@ def M3WriteVSTI : SchedWriteRes<[M3Un
// Special cases.
def M3WriteAES : SchedWriteRes<[M3UnitNCRY]> { let Latency = 1; }
+def M3WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M3WriteNALU1]>,
+ SchedVar<NoSchedPred, [M3WriteZ0]>]>;
+def M3WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M3WriteZ0]>,
+ SchedVar<NoSchedPred, [M3WriteNALU1]>]>;
+
+// Fast forwarding.
def M3ReadAES : SchedReadAdvance<1, [M3WriteAES]>;
def M3ReadFMAC : SchedReadAdvance<1, [M3WriteFMAC4,
M3WriteFMAC5]>;
-def M3WriteMOVI : SchedWriteVariant<[SchedVar<M3ResetPred, [M3WriteZ0]>,
- SchedVar<NoSchedPred, [M3WriteNALU1]>]>;
def M3ReadNMUL : SchedReadAdvance<1, [M3WriteNMUL3]>;
// Branch instructions
@@ -494,29 +486,40 @@ def : InstRW<[M3WriteC1], (instregex "^C
def : InstRW<[M3WriteAD], (instregex "^TBN?Z[WX]")>;
// Arithmetic and logical integer instructions.
-def : InstRW<[M3WriteA1], (instrs COPY)>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?Xrx64")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]$")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|BIC|SUB)S[WX]r[sx]$")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|EOR|ORR|SUB)[WX]ri")>;
+def : InstRW<[M3WriteAZ], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>;
+def : InstRW<[M3WriteAU], (instrs ORRWrs, ORRXrs)>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>;
+def : InstRW<[M3WriteAZ], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>;
+def : InstRW<[M3WriteAV], (instrs ADDWri, ADDXri)>;
+def : InstRW<[M3WriteAW], (instrs ORRWri, ORRXri)>;
// Move instructions.
-def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
-def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
+def : InstRW<[M3WriteCOPY], (instrs COPY)>;
+def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
+def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
// Divide and multiply instructions.
// Miscellaneous instructions.
-def : InstRW<[M3WriteAY], (instrs EXTRWrri, EXTRXrri)>;
// Load instructions.
+def : InstRW<[M3WriteLB,
+ ReadAdrBase], (instregex "^LDR(BB|HH|SBW|SBX|SHW|SWX|SW|W|X)roW")>;
+def : InstRW<[M3WriteL5,
+ ReadAdrBase], (instregex "^LDR(BB|HH|SBW|SBX|SHW|SWX|SW|W|X)roX")>;
def : InstRW<[M3WriteLD,
WriteLDHi,
WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
-def : InstRW<[M3WriteLX,
- ReadAdrBase], (instregex "^PRFMro[WX]")>;
+def : InstRW<[M3WriteLB,
+ ReadAdrBase], (instrs PRFMroW)>;
+def : InstRW<[M3WriteL5,
+ ReadAdrBase], (instrs PRFMroX)>;
// Store instructions.
+def : InstRW<[M3WriteSB,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
// FP data instructions.
def : InstRW<[M3WriteNSHF1], (instregex "^FABS[DS]r")>;
@@ -553,9 +556,11 @@ def : InstRW<[WriteVLD], (instregex "
def : InstRW<[WriteVLD,
WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[M3WriteLX,
- ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
-def : InstRW<[M3WriteLB,
+def : InstRW<[M3WriteLE,
+ ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
+def : InstRW<[WriteVLD,
+ ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
+def : InstRW<[M3WriteLE,
ReadAdrBase], (instregex "^LDRQro[WX]")>;
def : InstRW<[WriteVLD,
M3WriteLH], (instregex "^LDN?P[DS]i")>;
@@ -573,8 +578,10 @@ def : InstRW<[WriteVST], (instregex "
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
-def : InstRW<[M3WriteSY,
- ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
+def : InstRW<[M3WriteSA,
+ ReadAdrBase], (instregex "^STR[BDHS]roW")>;
+def : InstRW<[WriteVST,
+ ReadAdrBase], (instregex "^STR[BDHS]roX")>;
def : InstRW<[M3WriteSA,
ReadAdrBase], (instregex "^STRQro[WX]")>;
def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
Added: llvm/trunk/lib/Target/AArch64/AArch64SchedPredExynos.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64SchedPredExynos.td?rev=348774&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64SchedPredExynos.td (added)
+++ llvm/trunk/lib/Target/AArch64/AArch64SchedPredExynos.td Mon Dec 10 09:17:26 2018
@@ -0,0 +1,124 @@
+//===- AArch64SchedPredExynos.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 Exynos processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Identify BLR specifying the LR register as the indirect target register.
+def ExynosBranchLinkLRPred : MCSchedPredicate<
+ CheckAll<[CheckOpcode<[BLR]>,
+ CheckRegOperand<0, LR>]>>;
+
+// Identify arithmetic and logic instructions without or with limited extension.
+def ExynosExtFn : TIIPredicate<
+ "isExynosExtFast",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsArithExt32Op.ValidOpcodes,
+ MCReturnStatement<
+ CheckAny<[CheckExtBy0,
+ CheckAll<
+ [CheckExtUXTW,
+ CheckAny<
+ [CheckExtBy1,
+ CheckExtBy2,
+ CheckExtBy3]>]>]>>>,
+ MCOpcodeSwitchCase<
+ IsArithExt64Op.ValidOpcodes,
+ MCReturnStatement<
+ CheckAny<[CheckExtBy0,
+ CheckAll<
+ [CheckExtUXTX,
+ CheckAny<
+ [CheckExtBy1,
+ CheckExtBy2,
+ CheckExtBy3]>]>]>>>],
+ MCReturnStatement<FalsePred>>>;
+def ExynosExtPred : MCSchedPredicate<ExynosExtFn>;
+
+// Identify FP instructions.
+def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>;
+
+// Identify whether an instruction whose result is a long vector
+// operates on the upper half of the input registers.
+def ExynosLongVectorUpperFn : TIIPredicate<
+ "isExynosLongVectorUpper",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsLongVectorUpperOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>],
+ MCReturnStatement<FalsePred>>>;
+def ExynosLongVectorUpperPred : MCSchedPredicate<ExynosLongVectorUpperFn>;
+
+// Identify 128-bit NEON instructions.
+def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
+
+// Identify instructions that reset a register efficiently.
+def ExynosResetFn : TIIPredicate<
+ "isExynosResetFast",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ [ADR, ADRP,
+ MOVNWi, MOVNXi,
+ MOVZWi, MOVZXi],
+ MCReturnStatement<TruePred>>],
+ MCReturnStatement<
+ CheckAny<
+ [IsCopyIdiomFn,
+ IsZeroFPIdiomFn,
+ IsZeroIdiomFn]>>>>;
+def ExynosResetPred : MCSchedPredicate<ExynosResetFn>;
+
+// Identify EXTR as the alias for ROR (immediate).
+def ExynosRotateRightImmPred : MCSchedPredicate<
+ CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>,
+ CheckSameRegOperand<1, 2>]>>;
+
+// Identify arithmetic and logic instructions without or with limited shift.
+def ExynosShiftFn : TIIPredicate<
+ "isExynosShiftFast",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsArithLogicShiftOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckAny<[CheckShiftBy0,
+ CheckAll<
+ [CheckShiftLSL,
+ CheckAny<
+ [CheckShiftBy1,
+ CheckShiftBy2,
+ CheckShiftBy3]>]>]>>>],
+ MCReturnStatement<FalsePred>>>;
+def ExynosShiftPred : MCSchedPredicate<ExynosShiftFn>;
+
+// Identify more arithmetic and logic instructions without or limited shift.
+def ExynosShiftExFn : TIIPredicate<
+ "isExynosShiftExFast",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsArithLogicShiftOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckAll<
+ [CheckShiftLSL,
+ CheckShiftBy8]>>>],
+ MCReturnStatement<ExynosShiftFn>>>;
+def ExynosShiftExPred : MCSchedPredicate<ExynosShiftExFn>;
+
+// Identify arithmetic and logic immediate instructions.
+def ExynosCheapFn : TIIPredicate<
+ "isExynosCheapAsMove",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsArithLogicImmOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>],
+ MCReturnStatement<
+ CheckAny<
+ [ExynosExtFn, ExynosResetFn, ExynosShiftFn]>>>>;
Modified: llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/extended-register.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/extended-register.s?rev=348774&r1=348773&r2=348774&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/extended-register.s (original)
+++ llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/extended-register.s Mon Dec 10 09:17:26 2018
@@ -3,31 +3,31 @@
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
sub w0, w1, w2, sxtb #0
- add w3, w4, w5, sxth #1
+ add x3, x4, w5, sxth #1
subs x6, x7, w8, uxtw #2
adds x9, x10, x11, uxtx #3
sub w12, w13, w14, uxtb #3
- add w15, w16, w17, uxth #2
+ add x15, x16, w17, uxth #2
subs x18, x19, w20, sxtw #1
adds x21, x22, x23, sxtx #0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 800
-# EM1-NEXT: Total Cycles: 537
-# EM3-NEXT: Total Cycles: 403
+# EM1-NEXT: Total Cycles: 403
+# EM3-NEXT: Total Cycles: 303
# ALL-NEXT: Total uOps: 800
# EM1: Dispatch Width: 4
-# EM1-NEXT: uOps Per Cycle: 1.49
-# EM1-NEXT: IPC: 1.49
-# EM1-NEXT: Block RThroughput: 5.3
+# EM1-NEXT: uOps Per Cycle: 1.99
+# EM1-NEXT: IPC: 1.99
+# EM1-NEXT: Block RThroughput: 4.0
# EM3: Dispatch Width: 6
-# EM3-NEXT: uOps Per Cycle: 1.99
-# EM3-NEXT: IPC: 1.99
-# EM3-NEXT: Block RThroughput: 4.0
+# EM3-NEXT: uOps Per Cycle: 2.64
+# EM3-NEXT: IPC: 2.64
+# EM3-NEXT: Block RThroughput: 3.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
@@ -39,20 +39,20 @@
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
-# EM1-NEXT: 1 2 0.67 sub w0, w1, w2, sxtb
-# EM1-NEXT: 1 2 0.67 add w3, w4, w5, sxth #1
-# EM1-NEXT: 1 2 0.67 subs x6, x7, w8, uxtw #2
-# EM1-NEXT: 1 2 0.67 adds x9, x10, x11, uxtx #3
+# EM1-NEXT: 1 1 0.33 sub w0, w1, w2, sxtb
+# EM1-NEXT: 1 2 0.67 add x3, x4, w5, sxth #1
+# EM1-NEXT: 1 1 0.33 subs x6, x7, w8, uxtw #2
+# EM1-NEXT: 1 1 0.33 adds x9, x10, x11, uxtx #3
# EM1-NEXT: 1 2 0.67 sub w12, w13, w14, uxtb #3
-# EM1-NEXT: 1 2 0.67 add w15, w16, w17, uxth #2
+# EM1-NEXT: 1 2 0.67 add x15, x16, w17, uxth #2
# EM1-NEXT: 1 2 0.67 subs x18, x19, w20, sxtw #1
-# EM1-NEXT: 1 2 0.67 adds x21, x22, x23, sxtx
+# EM1-NEXT: 1 1 0.33 adds x21, x22, x23, sxtx
-# EM3-NEXT: 1 2 0.50 sub w0, w1, w2, sxtb
-# EM3-NEXT: 1 2 0.50 add w3, w4, w5, sxth #1
-# EM3-NEXT: 1 2 0.50 subs x6, x7, w8, uxtw #2
-# EM3-NEXT: 1 2 0.50 adds x9, x10, x11, uxtx #3
+# EM3-NEXT: 1 1 0.25 sub w0, w1, w2, sxtb
+# EM3-NEXT: 1 2 0.50 add x3, x4, w5, sxth #1
+# EM3-NEXT: 1 1 0.25 subs x6, x7, w8, uxtw #2
+# EM3-NEXT: 1 1 0.25 adds x9, x10, x11, uxtx #3
# EM3-NEXT: 1 2 0.50 sub w12, w13, w14, uxtb #3
-# EM3-NEXT: 1 2 0.50 add w15, w16, w17, uxth #2
+# EM3-NEXT: 1 2 0.50 add x15, x16, w17, uxth #2
# EM3-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #1
-# EM3-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx
+# EM3-NEXT: 1 1 0.25 adds x21, x22, x23, sxtx
Modified: llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/register-offset.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/register-offset.s?rev=348774&r1=348773&r2=348774&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/register-offset.s (original)
+++ llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/register-offset.s Mon Dec 10 09:17:26 2018
@@ -9,20 +9,17 @@
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 400
-
-# EM1-NEXT: Total Cycles: 408
-# EM3-NEXT: Total Cycles: 208
-
-# ALL-NEXT: Total uOps: 800
+# ALL-NEXT: Total Cycles: 308
+# ALL-NEXT: Total uOps: 600
# EM1: Dispatch Width: 4
-# EM1-NEXT: uOps Per Cycle: 1.96
-# EM1-NEXT: IPC: 0.98
+# EM1-NEXT: uOps Per Cycle: 1.95
+# EM1-NEXT: IPC: 1.30
# EM1-NEXT: Block RThroughput: 2.0
# EM3: Dispatch Width: 6
-# EM3-NEXT: uOps Per Cycle: 3.85
-# EM3-NEXT: IPC: 1.92
+# EM3-NEXT: uOps Per Cycle: 1.95
+# EM3-NEXT: IPC: 1.30
# EM3-NEXT: Block RThroughput: 2.0
# ALL: Instruction Info:
@@ -35,12 +32,12 @@
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
-# EM1-NEXT: 2 5 1.00 * ldr w0, [x1, x2]
-# EM1-NEXT: 2 2 1.00 * str x3, [x4, w5, sxtw]
-# EM1-NEXT: 2 5 1.00 * ldr x6, [x7, w8, uxtw #3]
-# EM1-NEXT: 2 2 1.00 * str x9, [x10, x11, lsl #3]
+# EM1-NEXT: 1 5 1.00 * ldr w0, [x1, x2]
+# EM3-NEXT: 1 5 0.50 * ldr w0, [x1, x2]
-# EM3-NEXT: 2 5 0.50 * ldr w0, [x1, x2]
-# EM3-NEXT: 2 1 1.00 * str x3, [x4, w5, sxtw]
+# ALL-NEXT: 2 2 1.00 * str x3, [x4, w5, sxtw]
+
+# EM1-NEXT: 2 5 1.00 * ldr x6, [x7, w8, uxtw #3]
# EM3-NEXT: 2 5 0.50 * ldr x6, [x7, w8, uxtw #3]
-# EM3-NEXT: 2 1 1.00 * str x9, [x10, x11, lsl #3]
+
+# ALL-NEXT: 1 1 1.00 * str x9, [x10, x11, lsl #3]
Modified: llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s?rev=348774&r1=348773&r2=348774&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s (original)
+++ llvm/trunk/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s Mon Dec 10 09:17:26 2018
@@ -10,20 +10,20 @@
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 400
-# EM1-NEXT: Total Cycles: 271
-# EM3-NEXT: Total Cycles: 203
+# EM1-NEXT: Total Cycles: 204
+# EM3-NEXT: Total Cycles: 154
# ALL-NEXT: Total uOps: 400
# EM1: Dispatch Width: 4
-# EM1-NEXT: uOps Per Cycle: 1.48
-# EM1-NEXT: IPC: 1.48
-# EM1-NEXT: Block RThroughput: 2.7
+# EM1-NEXT: uOps Per Cycle: 1.96
+# EM1-NEXT: IPC: 1.96
+# EM1-NEXT: Block RThroughput: 2.0
# EM3: Dispatch Width: 6
-# EM3-NEXT: uOps Per Cycle: 1.97
-# EM3-NEXT: IPC: 1.97
-# EM3-NEXT: Block RThroughput: 2.0
+# EM3-NEXT: uOps Per Cycle: 2.60
+# EM3-NEXT: IPC: 2.60
+# EM3-NEXT: Block RThroughput: 1.5
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
@@ -35,12 +35,12 @@
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
-# EM1-NEXT: 1 2 0.67 add w0, w1, w2
+# EM1-NEXT: 1 1 0.33 add w0, w1, w2
# EM1-NEXT: 1 2 0.67 sub x3, x4, x5, lsr #1
-# EM1-NEXT: 1 2 0.67 adds x6, x7, x8, lsl #2
+# EM1-NEXT: 1 1 0.33 adds x6, x7, x8, lsl #2
# EM1-NEXT: 1 2 0.67 subs w9, w10, w11, asr #3
-# EM3-NEXT: 1 2 0.50 add w0, w1, w2
+# EM3-NEXT: 1 1 0.25 add w0, w1, w2
# EM3-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1
-# EM3-NEXT: 1 2 0.50 adds x6, x7, x8, lsl #2
+# EM3-NEXT: 1 1 0.25 adds x6, x7, x8, lsl #2
# EM3-NEXT: 1 2 0.50 subs w9, w10, w11, asr #3
More information about the llvm-commits
mailing list