[llvm-commits] [llvm] r115365 - /llvm/trunk/lib/Target/ARM/ARMScheduleA9.td

Evan Cheng evan.cheng at apple.com
Fri Oct 1 15:52:29 PDT 2010


Author: evancheng
Date: Fri Oct  1 17:52:29 2010
New Revision: 115365

URL: http://llvm.org/viewvc/llvm-project?rev=115365&view=rev
Log:
Fix r115332: correctly model AGU / NEON mux.

Modified:
    llvm/trunk/lib/Target/ARM/ARMScheduleA9.td

Modified: llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleA9.td?rev=115365&r1=115364&r2=115365&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleA9.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleA9.td Fri Oct  1 17:52:29 2010
@@ -131,77 +131,95 @@
   //
   // Immediate offset
   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<1, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU]>],
                                 [3, 1], [A9_LdBypass]>,
   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU]>],
                                 [4, 1], [A9_LdBypass]>,
   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU]>],
                                 [3, 3, 1], [A9_LdBypass]>,
   //
   // Register offset
   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<1, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU]>],
                                 [3, 1, 1], [A9_LdBypass]>,
   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU]>],
                                 [4, 1, 1], [A9_LdBypass]>,
   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU]>],
                                 [3, 3, 1, 1], [A9_LdBypass]>,
   //
   // Scaled register offset
   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<1, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU]>],
                                 [4, 1, 1], [A9_LdBypass]>,
   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU]>],
                                 [5, 1, 1], [A9_LdBypass]>,
   //
   // Immediate offset with update
   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<1, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU]>],
                                 [3, 2, 1], [A9_LdBypass]>,
   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU]>],
                                 [4, 3, 1], [A9_LdBypass]>,
   //
   // Register offset with update
   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<1, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU]>],
                                 [3, 2, 1, 1], [A9_LdBypass]>,
   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU]>],
                                 [4, 3, 1, 1], [A9_LdBypass]>,
   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU]>],
                                 [3, 3, 1, 1], [A9_LdBypass]>,
   //
   // Scaled register offset with update
   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<1, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU]>],
                                 [4, 3, 1, 1], [A9_LdBypass]>,
   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Pipe1]>,
-                                  InstrStage<2, [A9_MUX0, A9_AGU]>],
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU]>],
                                  [5, 4, 1, 1], [A9_LdBypass]>,
   //
   // Load multiple
   InstrItinData<IIC_iLoadm   , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<2, [A9_MUX0, A9_AGU]>],
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<2, [A9_AGU]>],
                                [3], [A9_LdBypass]>,
 
   //
   // Load multiple plus branch
   InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<1, [A9_MUX0, A9_AGU]>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU]>,
                                 InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
 
   //
   // iLoadi + iALUr for t2LDRpci_pic.
   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<1, [A9_MUX0, A9_AGU]>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU]>,
                                 InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
                                [2, 1]>,
 
@@ -209,55 +227,71 @@
   ///
   // Immediate offset
   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<1, [A9_MUX0, A9_AGU]>], [1, 1]>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU]>], [1, 1]>,
   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1]>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU]>], [1, 1]>,
   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1]>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU]>], [1, 1]>,
   //
   // Register offset
   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<1, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU]>], [1, 1, 1]>,
   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
   //
   // Scaled register offset
   InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<1, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU]>], [1, 1, 1]>,
   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>], [1, 1, 1]>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
   //
   // Immediate offset with update
   InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<1, [A9_MUX0, A9_AGU]>], [2, 1, 1]>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU]>], [2, 1, 1]>,
   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Pipe1]>,
-                                  InstrStage<2, [A9_MUX0, A9_AGU]>], [3, 1, 1]>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU]>], [3, 1, 1]>,
   //
   // Register offset with update
   InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<1, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU]>],
                                 [2, 1, 1, 1]>,
   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Pipe1]>,
-                                  InstrStage<2, [A9_MUX0, A9_AGU]>],
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU]>],
                                  [3, 1, 1, 1]>,
   InstrItinData<IIC_iStore_d_ru,[InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<2, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU]>],
                                 [3, 1, 1, 1]>,
   //
   // Scaled register offset with update
   InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Pipe1]>,
-                                 InstrStage<1, [A9_MUX0, A9_AGU]>],
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU]>],
                                 [2, 1, 1, 1]>,
   InstrItinData<IIC_iStore_bh_siu,[InstrStage<1, [A9_Pipe1]>,
-                                   InstrStage<2, [A9_MUX0, A9_AGU]>],
+                                   InstrStage<1, [A9_MUX0], 0>,
+                                   InstrStage<2, [A9_AGU]>],
                                   [3, 1, 1, 1]>,
   //
   // Store multiple
   InstrItinData<IIC_iStorem  , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<1, [A9_MUX0, A9_AGU]>]>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU]>]>,
   // Branch
   //
   // no delay slots, so the latency of a branch is unimportant
@@ -284,14 +318,16 @@
   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
                               InstrStage<1, [A9_Pipe1]>,
-                              InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
+                              InstrStage<1, [A9_MUX0], 0>,
+                              InstrStage<1, [A9_NPipe]>]>,
   //
   // Single-precision FP Unary
   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                // Extra latency cycles since wbck is 2 cycles
                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [1, 1]>,
   //
   // Double-precision FP Unary
@@ -299,7 +335,8 @@
                                // Extra latency cycles since wbck is 2 cycles
                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [1, 1]>,
 
   //
@@ -308,7 +345,8 @@
                                // Extra latency cycles since wbck is 4 cycles
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [1, 1]>,
   //
   // Double-precision FP Compare
@@ -316,21 +354,24 @@
                                // Extra latency cycles since wbck is 4 cycles
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [1, 1]>,
   //
   // Single to Double FP Convert
   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 1]>,
   //
   // Double to Single FP Convert
   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 1]>,
 
   //
@@ -338,14 +379,16 @@
   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 1]>,
   //
   // Half to Single FP Convert
   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [2, 1]>,
 
   //
@@ -353,98 +396,112 @@
   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 1]>,
   //
   // Double-Precision FP to Integer Convert
   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 1]>,
   //
   // Integer to Single-Precision FP Convert
   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 1]>,
   //
   // Integer to Double-Precision FP Convert
   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 1]>,
   //
   // Single-precision FP ALU
   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 1, 1]>,
   //
   // Double-precision FP ALU
   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 1, 1]>,
   //
   // Single-precision FP Multiply
   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [5, 1, 1]>,
   //
   // Double-precision FP Multiply
   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [6, 1, 1]>,
   //
   // Single-precision FP MAC
   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [8, 0, 1, 1]>,
   //
   // Double-precision FP MAC
   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
                                InstrStage<1,  [A9_Pipe1]>,
-                               InstrStage<2,  [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<2,  [A9_NPipe]>],
                               [9, 0, 1, 1]>,
   //
   // Single-precision FP DIV
   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
                                InstrStage<1,  [A9_Pipe1]>,
-                               InstrStage<10, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<10, [A9_NPipe]>],
                               [15, 1, 1]>,
   //
   // Double-precision FP DIV
   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
                                InstrStage<1,  [A9_Pipe1]>,
-                               InstrStage<20, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<20, [A9_NPipe]>],
                               [25, 1, 1]>,
   //
   // Single-precision FP SQRT
   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1,  [A9_Pipe1]>,
-                               InstrStage<13, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<13, [A9_NPipe]>],
                               [17, 1]>,
   //
   // Double-precision FP SQRT
   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1,  [A9_Pipe1]>,
-                               InstrStage<28, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<28, [A9_NPipe]>],
                               [32, 1]>,
 
   //
@@ -453,7 +510,8 @@
                                // Extra 1 latency cycle since wbck is 2 cycles
                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [1, 1]>,
   //
   // Integer to Double-precision Move
@@ -461,28 +519,32 @@
                                // Extra 1 latency cycle since wbck is 2 cycles
                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [1, 1, 1]>,
   //
   // Single-precision to Integer Move
   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [1, 1]>,
   //
   // Double-precision to Integer Move
   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [1, 1, 1]>,
   //
   // Single-precision FP Load
   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [1, 1]>,
   //
   // Double-precision FP Load
@@ -490,34 +552,39 @@
   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [2, 1]>,
   //
   // FP Load Multiple
   InstrItinData<IIC_fpLoadm,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
   //
   // Single-precision FP Store
   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [1, 1]>,
   //
   // Double-precision FP Store
   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [1, 1]>,
   //
   // FP Store Multiple
   InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
   // NEON
   // Issue through integer pipeline, and execute in NEON unit.
   // VLD1
@@ -525,7 +592,8 @@
   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
   //
   // VLD2
   // FIXME: We don't model this instruction properly
@@ -533,7 +601,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [2, 2, 1]>,
   //
   // VLD3
@@ -542,7 +611,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [2, 2, 2, 1]>,
   //
   // VLD4
@@ -551,7 +621,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [2, 2, 2, 2, 1]>,
   //
   // VST
@@ -560,14 +631,16 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>]>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
   //
   // Double-register Integer Unary
   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 2]>,
   //
   // Quad-register Integer Unary
@@ -575,7 +648,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 2]>,
   //
   // Double-register Integer Q-Unary
@@ -583,7 +657,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 1]>,
   //
   // Quad-register Integer CountQ-Unary
@@ -591,7 +666,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 1]>,
   //
   // Double-register Integer Binary
@@ -599,7 +675,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [3, 2, 2]>,
   //
   // Quad-register Integer Binary
@@ -607,7 +684,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [3, 2, 2]>,
   //
   // Double-register Integer Subtract
@@ -615,7 +693,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [3, 2, 1]>,
   //
   // Quad-register Integer Subtract
@@ -623,7 +702,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [3, 2, 1]>,
   //
   // Double-register Integer Shift
@@ -631,7 +711,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [3, 1, 1]>,
   //
   // Quad-register Integer Shift
@@ -639,7 +720,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [3, 1, 1]>,
   //
   // Double-register Integer Shift (4 cycle)
@@ -647,7 +729,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 1, 1]>,
   //
   // Quad-register Integer Shift (4 cycle)
@@ -655,7 +738,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 1, 1]>,
   //
   // Double-register Integer Binary (4 cycle)
@@ -663,7 +747,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 2, 2]>,
   //
   // Quad-register Integer Binary (4 cycle)
@@ -671,7 +756,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 2, 2]>,
   //
   // Double-register Integer Subtract (4 cycle)
@@ -679,7 +765,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 2, 1]>,
   //
   // Quad-register Integer Subtract (4 cycle)
@@ -687,7 +774,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [4, 2, 1]>,
 
   //
@@ -696,7 +784,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [3, 2, 2]>,
   //
   // Quad-register Integer Count
@@ -706,7 +795,8 @@
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [4, 2, 2]>,
   //
   // Double-register Absolute Difference and Accumulate
@@ -714,7 +804,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [6, 3, 2, 1]>,
   //
   // Quad-register Absolute Difference and Accumulate
@@ -722,7 +813,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [6, 3, 2, 1]>,
   //
   // Double-register Integer Pair Add Long
@@ -730,7 +822,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [6, 3, 1]>,
   //
   // Quad-register Integer Pair Add Long
@@ -738,7 +831,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [6, 3, 1]>,
 
   //
@@ -747,7 +841,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [6, 2, 2]>,
   //
   // Quad-register Integer Multiply (.8, .16)
@@ -755,7 +850,8 @@
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [7, 2, 2]>,
 
   //
@@ -764,7 +860,8 @@
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [7, 2, 1]>,
   //
   // Quad-register Integer Multiply (.32)
@@ -772,7 +869,8 @@
                                // Extra latency cycles since wbck is 9 cycles
                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<4, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<4, [A9_NPipe]>],
                               [9, 2, 1]>,
   //
   // Double-register Integer Multiply-Accumulate (.8, .16)
@@ -780,7 +878,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [6, 3, 2, 2]>,
   //
   // Double-register Integer Multiply-Accumulate (.32)
@@ -788,7 +887,8 @@
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [7, 3, 2, 1]>,
   //
   // Quad-register Integer Multiply-Accumulate (.8, .16)
@@ -796,7 +896,8 @@
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [7, 3, 2, 2]>,
   //
   // Quad-register Integer Multiply-Accumulate (.32)
@@ -804,7 +905,8 @@
                                // Extra latency cycles since wbck is 9 cycles
                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<4, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<4, [A9_NPipe]>],
                               [9, 3, 2, 1]>,
 
   //
@@ -812,7 +914,8 @@
   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_DRegsN],   0, Required>,
                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [1,1]>,
   //
   // Move Immediate
@@ -820,7 +923,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [3]>,
   //
   // Double-register Permute Move
@@ -828,7 +932,8 @@
   // FIXME: all latencies are arbitrary, no information is available
                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [2, 1]>,
   //
   // Quad-register Permute Move
@@ -836,7 +941,8 @@
   // FIXME: all latencies are arbitrary, no information is available
                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [2, 1]>,
   //
   // Integer to Single-precision Move
@@ -844,7 +950,8 @@
   // FIXME: all latencies are arbitrary, no information is available
                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [2, 1]>,
   //
   // Integer to Double-precision Move
@@ -852,7 +959,8 @@
   // FIXME: all latencies are arbitrary, no information is available
                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [2, 1, 1]>,
   //
   // Single-precision to Integer Move
@@ -860,7 +968,8 @@
   // FIXME: all latencies are arbitrary, no information is available
                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [2, 1]>,
   //
   // Double-precision to Integer Move
@@ -868,7 +977,8 @@
   // FIXME: all latencies are arbitrary, no information is available
                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [2, 2, 1]>,
   //
   // Integer to Lane Move
@@ -876,7 +986,8 @@
   // FIXME: all latencies are arbitrary, no information is available
                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [3, 1, 1]>,
 
   //
@@ -885,7 +996,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [3, 1]>,
   //
   // Double-register FP Unary
@@ -893,7 +1005,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [5, 2]>,
   //
   // Quad-register FP Unary
@@ -903,7 +1016,8 @@
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [6, 2]>,
   //
   // Double-register FP Binary
@@ -913,7 +1027,8 @@
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [5, 2, 2]>,
   //
   // Quad-register FP Binary
@@ -925,7 +1040,8 @@
                                // Extra latency cycles since wbck is 8 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [6, 2, 2]>,
   //
   // Double-register FP Multiple-Accumulate
@@ -933,7 +1049,8 @@
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [6, 3, 2, 1]>,
   //
   // Quad-register FP Multiple-Accumulate
@@ -943,7 +1060,8 @@
                                // Extra latency cycles since wbck is 9 cycles
                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<4, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<4, [A9_NPipe]>],
                               [8, 4, 2, 1]>,
   //
   // Double-register Reciprical Step
@@ -951,7 +1069,8 @@
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [6, 2, 2]>,
   //
   // Quad-register Reciprical Step
@@ -959,7 +1078,8 @@
                                // Extra latency cycles since wbck is 9 cycles
                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<4, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<4, [A9_NPipe]>],
                               [8, 2, 2]>,
   //
   // Double-register Permute
@@ -967,7 +1087,8 @@
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [2, 2, 1, 1]>,
   //
   // Quad-register Permute
@@ -977,7 +1098,8 @@
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [3, 3, 1, 1]>,
   //
   // Quad-register Permute (3 cycle issue)
@@ -987,7 +1109,8 @@
                                // Extra latency cycles since wbck is 8 cycles
                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<3, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<3, [A9_NPipe]>],
                               [4, 4, 1, 1]>,
 
   //
@@ -996,7 +1119,8 @@
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_NPipe]>],
                               [2, 1, 1]>,
   //
   // Quad-register VEXT
@@ -1004,7 +1128,8 @@
                                // Extra latency cycles since wbck is 9 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [3, 1, 1]>,
   //
   // VTB
@@ -1012,25 +1137,29 @@
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [3, 2, 1]>,
   InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [3, 2, 2, 1]>,
   InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 8 cycles
                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<3, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<3, [A9_NPipe]>],
                               [4, 2, 2, 3, 1]>,
   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 8 cycles
                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<3, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<3, [A9_NPipe]>],
                               [4, 2, 2, 3, 3, 1]>,
   //
   // VTBX
@@ -1038,24 +1167,28 @@
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [3, 1, 2, 1]>,
   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [3, 1, 2, 2, 1]>,
   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 8 cycles
                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<3, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<3, [A9_NPipe]>],
                               [4, 1, 2, 2, 3, 1]>,
   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 8 cycles
                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_NPipe]>],
                               [4, 1, 2, 2, 3, 3, 1]>
 ]>;





More information about the llvm-commits mailing list