[llvm-commits] [llvm] r115332 - /llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
Evan Cheng
evan.cheng at apple.com
Fri Oct 1 14:41:19 PDT 2010
This is wrong. I'll fix it.
Evan
On Oct 1, 2010, at 12:41 PM, Evan Cheng wrote:
> Author: evancheng
> Date: Fri Oct 1 14:41:46 2010
> New Revision: 115332
>
> URL: http://llvm.org/viewvc/llvm-project?rev=115332&view=rev
> Log:
> Per Cortex-A9 pipeline diagram. AGU (core load / store issue) and NEON/FP issue are multiplexed. Model it correctly.
>
> Modified:
> llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
>
> Modified: llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleA9.td?rev=115332&r1=115331&r2=115332&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMScheduleA9.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMScheduleA9.td Fri Oct 1 14:41:46 2010
> @@ -18,10 +18,11 @@
> // Functional units
> def A9_Pipe0 : FuncUnit; // pipeline 0
> def A9_Pipe1 : FuncUnit; // pipeline 1
> -def A9_AGU : FuncUnit; // LS pipe
> -def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe
> +def A9_AGU : FuncUnit; // Address generation unit for ld / st
> +def A9_NPipe : FuncUnit; // NEON ALU/MUL pipeline
> def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
> def A9_DRegsN : FuncUnit; // FP register set, NEON side
> +def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
>
> // Bypasses
> def A9_LdBypass : Bypass;
> @@ -29,7 +30,7 @@
> // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
> //
> def CortexA9Itineraries : ProcessorItineraries<
> - [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_AGU, A9_Pipe0, A9_Pipe1],
> + [A9_Pipe0, A9_Pipe1, A9_AGU, A9_NPipe, A9_DRegsVFP, A9_DRegsN, A9_MUX0],
> [A9_LdBypass], [
> // Two fully-pipelined integer ALU pipelines
>
> @@ -130,77 +131,77 @@
> //
> // Immediate offset
> InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>],
> + InstrStage<1, [A9_MUX0, A9_AGU]>],
> [3, 1], [A9_LdBypass]>,
> InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>],
> + InstrStage<2, [A9_MUX0, A9_AGU]>],
> [4, 1], [A9_LdBypass]>,
> // FIXME: If address is 64-bit aligned, AGU cycles is 1.
> InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>],
> + InstrStage<2, [A9_MUX0, A9_AGU]>],
> [3, 3, 1], [A9_LdBypass]>,
> //
> // Register offset
> InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>],
> + InstrStage<1, [A9_MUX0, A9_AGU]>],
> [3, 1, 1], [A9_LdBypass]>,
> InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>],
> + InstrStage<2, [A9_MUX0, A9_AGU]>],
> [4, 1, 1], [A9_LdBypass]>,
> InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>],
> + InstrStage<2, [A9_MUX0, A9_AGU]>],
> [3, 3, 1, 1], [A9_LdBypass]>,
> //
> // Scaled register offset
> InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>],
> + InstrStage<1, [A9_MUX0, A9_AGU]>],
> [4, 1, 1], [A9_LdBypass]>,
> InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>],
> + InstrStage<2, [A9_MUX0, A9_AGU]>],
> [5, 1, 1], [A9_LdBypass]>,
> //
> // Immediate offset with update
> InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>],
> + InstrStage<1, [A9_MUX0, A9_AGU]>],
> [3, 2, 1], [A9_LdBypass]>,
> InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>],
> + InstrStage<2, [A9_MUX0, A9_AGU]>],
> [4, 3, 1], [A9_LdBypass]>,
> //
> // Register offset with update
> InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>],
> + InstrStage<1, [A9_MUX0, A9_AGU]>],
> [3, 2, 1, 1], [A9_LdBypass]>,
> InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>],
> + InstrStage<2, [A9_MUX0, A9_AGU]>],
> [4, 3, 1, 1], [A9_LdBypass]>,
> InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>],
> + InstrStage<2, [A9_MUX0, A9_AGU]>],
> [3, 3, 1, 1], [A9_LdBypass]>,
> //
> // Scaled register offset with update
> InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>],
> + InstrStage<1, [A9_MUX0, A9_AGU]>],
> [4, 3, 1, 1], [A9_LdBypass]>,
> InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>],
> - [5, 4, 1, 1], [A9_LdBypass]>,
> + InstrStage<2, [A9_MUX0, A9_AGU]>],
> + [5, 4, 1, 1], [A9_LdBypass]>,
> //
> // Load multiple
> InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>],
> + InstrStage<2, [A9_MUX0, A9_AGU]>],
> [3], [A9_LdBypass]>,
>
> //
> // Load multiple plus branch
> InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>,
> + InstrStage<1, [A9_MUX0, A9_AGU]>,
> InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
>
> //
> // iLoadi + iALUr for t2LDRpci_pic.
> InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>,
> + InstrStage<1, [A9_MUX0, A9_AGU]>,
> InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
> [2, 1]>,
>
> @@ -208,50 +209,55 @@
> ///
> // Immediate offset
> InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>], [1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_AGU]>], [1, 1]>,
> InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>], [1, 1]>,
> + InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1]>,
> // FIXME: If address is 64-bit aligned, AGU cycles is 1.
> InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>], [1, 1]>,
> + InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1]>,
> //
> // Register offset
> - InstrItinData<IIC_iStore_r , [InstrStage<1, [ A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>], [1, 1, 1]>,
> - InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [ A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
> - InstrItinData<IIC_iStore_d_r, [InstrStage<1, [ A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
> + InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Pipe1]>,
> + InstrStage<1, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
> + InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Pipe1]>,
> + InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
> + InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Pipe1]>,
> + InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
> //
> // Scaled register offset
> InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>], [1, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
> InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
> + InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
> //
> // Immediate offset with update
> InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>], [2, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_AGU]>], [2, 1, 1]>,
> InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>], [3, 1, 1]>,
> + InstrStage<2, [A9_MUX0, A9_AGU]>], [3, 1, 1]>,
> //
> // Register offset with update
> InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>], [2, 1, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_AGU]>],
> + [2, 1, 1, 1]>,
> InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>], [3, 1, 1, 1]>,
> + InstrStage<2, [A9_MUX0, A9_AGU]>],
> + [3, 1, 1, 1]>,
> InstrItinData<IIC_iStore_d_ru,[InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>], [3, 1, 1, 1]>,
> + InstrStage<2, [A9_MUX0, A9_AGU]>],
> + [3, 1, 1, 1]>,
> //
> // Scaled register offset with update
> InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>], [2, 1, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_AGU]>],
> + [2, 1, 1, 1]>,
> InstrItinData<IIC_iStore_bh_siu,[InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_AGU]>], [3, 1, 1, 1]>,
> + InstrStage<2, [A9_MUX0, A9_AGU]>],
> + [3, 1, 1, 1]>,
> //
> // Store multiple
> InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>]>,
> + InstrStage<1, [A9_MUX0, A9_AGU]>]>,
> // Branch
> //
> // no delay slots, so the latency of a branch is unimportant
> @@ -278,21 +284,23 @@
> InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<2, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
> //
> // Single-precision FP Unary
> InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> // Extra latency cycles since wbck is 2 cycles
> InstrStage<3, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [1, 1]>,
> //
> // Double-precision FP Unary
> InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> // Extra latency cycles since wbck is 2 cycles
> InstrStage<3, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [1, 1]>,
>
> //
> // Single-precision FP Compare
> @@ -300,124 +308,144 @@
> // Extra latency cycles since wbck is 4 cycles
> InstrStage<5, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [1, 1]>,
> //
> // Double-precision FP Compare
> InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> // Extra latency cycles since wbck is 4 cycles
> InstrStage<5, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [1, 1]>,
> //
> // Single to Double FP Convert
> InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<5, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 1]>,
> //
> // Double to Single FP Convert
> InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<5, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 1]>,
>
> //
> // Single to Half FP Convert
> InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<5, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 1]>,
> //
> // Half to Single FP Convert
> InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<3, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [2, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [2, 1]>,
>
> //
> // Single-Precision FP to Integer Convert
> InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<5, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 1]>,
> //
> // Double-Precision FP to Integer Convert
> InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<5, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 1]>,
> //
> // Integer to Single-Precision FP Convert
> InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<5, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 1]>,
> //
> // Integer to Double-Precision FP Convert
> InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<5, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 1]>,
> //
> // Single-precision FP ALU
> InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<5, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 1, 1]>,
> //
> // Double-precision FP ALU
> InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<5, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 1, 1]>,
> //
> // Single-precision FP Multiply
> InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<6, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [5, 1, 1]>,
> //
> // Double-precision FP Multiply
> InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<7, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [6, 1, 1]>,
> //
> // Single-precision FP MAC
> InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<9, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [8, 0, 1, 1]>,
> //
> // Double-precision FP MAC
> InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<10, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [9, 0, 1, 1]>,
> //
> // Single-precision FP DIV
> InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<16, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
> + InstrStage<10, [A9_MUX0, A9_NPipe]>],
> + [15, 1, 1]>,
> //
> // Double-precision FP DIV
> InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<26, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
> + InstrStage<20, [A9_MUX0, A9_NPipe]>],
> + [25, 1, 1]>,
> //
> // Single-precision FP SQRT
> InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<18, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<13, [A9_NPipe]>], [17, 1]>,
> + InstrStage<13, [A9_MUX0, A9_NPipe]>],
> + [17, 1]>,
> //
> // Double-precision FP SQRT
> InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<33, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<28, [A9_NPipe]>], [32, 1]>,
> + InstrStage<28, [A9_MUX0, A9_NPipe]>],
> + [32, 1]>,
>
> //
> // Integer to Single-precision Move
> @@ -425,79 +453,74 @@
> // Extra 1 latency cycle since wbck is 2 cycles
> InstrStage<3, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [1, 1]>,
> //
> // Integer to Double-precision Move
> InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> // Extra 1 latency cycle since wbck is 2 cycles
> InstrStage<3, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [1, 1, 1]>,
> //
> // Single-precision to Integer Move
> InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<2, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [1, 1]>,
> //
> // Double-precision to Integer Move
> InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<2, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [1, 1, 1]>,
> //
> // Single-precision FP Load
> InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<2, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1], 0>,
> - InstrStage<1, [A9_AGU]>,
> - InstrStage<1, [A9_NPipe]>]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
> //
> // Double-precision FP Load
> InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<2, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1], 0>,
> - InstrStage<1, [A9_AGU]>,
> - InstrStage<1, [A9_NPipe]>]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
> //
> // FP Load Multiple
> InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<2, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1], 0>,
> - InstrStage<1, [A9_AGU]>,
> - InstrStage<1, [A9_NPipe]>]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
> //
> // Single-precision FP Store
> InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<2, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1], 0>,
> - InstrStage<1, [A9_AGU]>,
> - InstrStage<1, [A9_NPipe]>]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
> //
> // Double-precision FP Store
> InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<2, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1], 0>,
> - InstrStage<1, [A9_AGU]>,
> - InstrStage<1, [A9_NPipe]>]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
> //
> // FP Store Multiple
> InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
> InstrStage<2, [A9_DRegsN], 0, Reserved>,
> InstrStage<1, [A9_Pipe1], 0>,
> - InstrStage<1, [A9_AGU]>,
> - InstrStage<1, [A9_NPipe]>]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
> // NEON
> // Issue through integer pipeline, and execute in NEON unit.
> - // FIXME: Neon pipeline and LdSt unit are multiplexed.
> - // Add some syntactic sugar to model this!
> // VLD1
> // FIXME: We don't model this instruction properly
> InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1], 0>,
> - InstrStage<1, [A9_AGU]>,
> - InstrStage<1, [A9_NPipe]>]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
> //
> // VLD2
> // FIXME: We don't model this instruction properly
> @@ -505,8 +528,8 @@
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1], 0>,
> - InstrStage<1, [A9_AGU]>,
> - InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [2, 2, 1]>,
> //
> // VLD3
> // FIXME: We don't model this instruction properly
> @@ -514,8 +537,8 @@
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1], 0>,
> - InstrStage<1, [A9_AGU]>,
> - InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [2, 2, 2, 1]>,
> //
> // VLD4
> // FIXME: We don't model this instruction properly
> @@ -523,8 +546,8 @@
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1], 0>,
> - InstrStage<1, [A9_AGU]>,
> - InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [2, 2, 2, 2, 1]>,
> //
> // VST
> // FIXME: We don't model this instruction properly
> @@ -532,120 +555,135 @@
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1], 0>,
> - InstrStage<1, [A9_AGU]>,
> - InstrStage<1, [A9_NPipe]>]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
> //
> // Double-register Integer Unary
> InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 2]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 2]>,
> //
> // Quad-register Integer Unary
> InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 2]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 2]>,
> //
> // Double-register Integer Q-Unary
> InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 1]>,
> //
> // Quad-register Integer CountQ-Unary
> InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 1]>,
> //
> // Double-register Integer Binary
> InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [3, 2, 2]>,
> //
> // Quad-register Integer Binary
> InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [3, 2, 2]>,
> //
> // Double-register Integer Subtract
> InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [3, 2, 1]>,
> //
> // Quad-register Integer Subtract
> InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [3, 2, 1]>,
> //
> // Double-register Integer Shift
> InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [3, 1, 1]>,
> //
> // Quad-register Integer Shift
> InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [3, 1, 1]>,
> //
> // Double-register Integer Shift (4 cycle)
> InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 1, 1]>,
> //
> // Quad-register Integer Shift (4 cycle)
> InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 1, 1]>,
> //
> // Double-register Integer Binary (4 cycle)
> InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 2, 2]>,
> //
> // Quad-register Integer Binary (4 cycle)
> InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 2, 2]>,
> //
> // Double-register Integer Subtract (4 cycle)
> InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 2, 1]>,
> //
> // Quad-register Integer Subtract (4 cycle)
> InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [4, 2, 1]>,
>
> //
> // Double-register Integer Count
> @@ -653,7 +691,8 @@
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [3, 2, 2]>,
> //
> // Quad-register Integer Count
> // Result written in N3, but that is relative to the last cycle of multicycle,
> @@ -662,35 +701,40 @@
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [4, 2, 2]>,
> //
> // Double-register Absolute Difference and Accumulate
> InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [6, 3, 2, 1]>,
> //
> // Quad-register Absolute Difference and Accumulate
> InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [6, 3, 2, 1]>,
> //
> // Double-register Integer Pair Add Long
> InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [6, 3, 1]>,
> //
> // Quad-register Integer Pair Add Long
> InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [6, 3, 1]>,
>
> //
> // Double-register Integer Multiply (.8, .16)
> @@ -698,14 +742,16 @@
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [6, 2, 2]>,
> //
> // Quad-register Integer Multiply (.8, .16)
> InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [7, 2, 2]>,
>
> //
> // Double-register Integer Multiply (.32)
> @@ -713,56 +759,64 @@
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [7, 2, 1]>,
> //
> // Quad-register Integer Multiply (.32)
> InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 9 cycles
> InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
> + InstrStage<4, [A9_MUX0, A9_NPipe]>],
> + [9, 2, 1]>,
> //
> // Double-register Integer Multiply-Accumulate (.8, .16)
> InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [6, 3, 2, 2]>,
> //
> // Double-register Integer Multiply-Accumulate (.32)
> InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [7, 3, 2, 1]>,
> //
> // Quad-register Integer Multiply-Accumulate (.8, .16)
> InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [7, 3, 2, 2]>,
> //
> // Quad-register Integer Multiply-Accumulate (.32)
> InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 9 cycles
> InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
> + InstrStage<4, [A9_MUX0, A9_NPipe]>],
> + [9, 3, 2, 1]>,
> //
> // Move Immediate
> InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [3]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [3]>,
> //
> // Double-register Permute Move
> InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // FIXME: all latencies are arbitrary, no information is available
> InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_AGU]>], [2, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [2, 1]>,
> //
> // Quad-register Permute Move
> // Result written in N2, but that is relative to the last cycle of multicycle,
> @@ -771,42 +825,48 @@
> // FIXME: all latencies are arbitrary, no information is available
> InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [3, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [3, 1]>,
> //
> // Integer to Single-precision Move
> InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
> // FIXME: all latencies are arbitrary, no information is available
> InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [2, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [2, 1]>,
> //
> // Integer to Double-precision Move
> InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
> // FIXME: all latencies are arbitrary, no information is available
> InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [2, 1, 1]>,
> //
> // Single-precision to Integer Move
> InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
> // FIXME: all latencies are arbitrary, no information is available
> InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [2, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [2, 1]>,
> //
> // Double-precision to Integer Move
> InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
> // FIXME: all latencies are arbitrary, no information is available
> InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [2, 2, 1]>,
> //
> // Integer to Lane Move
> InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
> // FIXME: all latencies are arbitrary, no information is available
> InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [3, 1, 1]>,
>
> //
> // Double-register FP Unary
> @@ -814,7 +874,8 @@
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [5, 2]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [5, 2]>,
> //
> // Quad-register FP Unary
> // Result written in N5, but that is relative to the last cycle of multicycle,
> @@ -823,7 +884,8 @@
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [6, 2]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [6, 2]>,
> //
> // Double-register FP Binary
> // FIXME: We're using this itin for many instructions and [2, 2] here is too
> @@ -832,7 +894,8 @@
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [5, 2, 2]>,
> //
> // Quad-register FP Binary
> // Result written in N5, but that is relative to the last cycle of multicycle,
> @@ -843,14 +906,16 @@
> // Extra latency cycles since wbck is 8 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [6, 2, 2]>,
> //
> // Double-register FP Multiple-Accumulate
> InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [6, 3, 2, 1]>,
> //
> // Quad-register FP Multiple-Accumulate
> // Result written in N9, but that is relative to the last cycle of multicycle,
> @@ -859,28 +924,32 @@
> // Extra latency cycles since wbck is 9 cycles
> InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
> + InstrStage<4, [A9_MUX0, A9_NPipe]>],
> + [8, 4, 2, 1]>,
> //
> // Double-register Reciprical Step
> InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [6, 2, 2]>,
> //
> // Quad-register Reciprical Step
> InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 9 cycles
> InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
> + InstrStage<4, [A9_MUX0, A9_NPipe]>],
> + [8, 2, 2]>,
> //
> // Double-register Permute
> InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 6 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [2, 2, 1, 1]>,
> //
> // Quad-register Permute
> // Result written in N2, but that is relative to the last cycle of multicycle,
> @@ -889,7 +958,8 @@
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [3, 3, 1, 1]>,
> //
> // Quad-register Permute (3 cycle issue)
> // Result written in N2, but that is relative to the last cycle of multicycle,
> @@ -898,7 +968,8 @@
> // Extra latency cycles since wbck is 8 cycles
> InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<3, [A9_AGU]>], [4, 4, 1, 1]>,
> + InstrStage<3, [A9_MUX0, A9_NPipe]>],
> + [4, 4, 1, 1]>,
>
> //
> // Double-register VEXT
> @@ -906,56 +977,66 @@
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
> + InstrStage<1, [A9_MUX0, A9_NPipe]>],
> + [2, 1, 1]>,
> //
> // Quad-register VEXT
> InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 9 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [3, 1, 1]>,
> //
> // VTB
> InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [3, 2, 1]>,
> InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [3, 2, 2, 1]>,
> InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 8 cycles
> InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
> + InstrStage<3, [A9_MUX0, A9_NPipe]>],
> + [4, 2, 2, 3, 1]>,
> InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 8 cycles
> InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
> + InstrStage<3, [A9_MUX0, A9_NPipe]>],
> + [4, 2, 2, 3, 3, 1]>,
> //
> // VTBX
> InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [3, 1, 2, 1]>,
> InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 7 cycles
> InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [3, 1, 2, 2, 1]>,
> InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 8 cycles
> InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
> + InstrStage<3, [A9_MUX0, A9_NPipe]>],
> + [4, 1, 2, 2, 3, 1]>,
> InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
> // Extra latency cycles since wbck is 8 cycles
> InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
> InstrStage<1, [A9_Pipe1]>,
> - InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
> + InstrStage<2, [A9_MUX0, A9_NPipe]>],
> + [4, 1, 2, 2, 3, 3, 1]>
> ]>;
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list