[llvm] r202767 - [AArch64] This is a work in progress to provide a machine description

Mon Mar 3 15:32:48 PST 2014

Author: mcrosier
Date: Mon Mar  3 17:32:47 2014
New Revision: 202767

URL: http://llvm.org/viewvc/llvm-project?rev=202767&view=rev
Log:
[AArch64] This is a work in progress to provide a machine description
for the Cortex-A53 subtarget in the AArch64 backend.

This patch lays the ground work to annotate each AArch64 instruction
(no NEON yet) with a list of SchedReadWrite types. The patch also
provides the Cortex-A53 processor resources, maps those the the default
SchedReadWrites, and provides basic latency. NEON support will be added
in a subsequent patch with proper forwarding logic.

Verification was done by setting the pre-RA scheduler to linearize to
better gauge the effect of the MIScheduler. Even without modeling the
forward logic, the results show a modest improvement for Cortex-A53.

Reviewers: apazos, mcrosier, atrick
Patch by Dave Estes <cestes at codeaurora.org>!

Added:
    llvm/trunk/lib/Target/AArch64/AArch64ScheduleA53.td
    llvm/trunk/test/CodeGen/AArch64/misched-basic-A53.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64.td
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/trunk/lib/Target/AArch64/AArch64Schedule.td
    llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h

Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=202767&r1=202766&r2=202767&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.td Mon Mar  3 17:32:47 2014
@@ -41,13 +41,20 @@ class ProcNoItin<string Name, list<Subta
 
 def : Processor<"generic", GenericItineraries, [FeatureFPARMv8, FeatureNEON]>;
 
-def : ProcNoItin<"cortex-a53",      [FeatureFPARMv8,
-                                    FeatureNEON,
-                                    FeatureCrypto]>;
+def ProcA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
+                                   "Cortex-A53 ARM processors",
+                                   [FeatureFPARMv8,
+                                   FeatureNEON,
+                                   FeatureCrypto]>;
 
-def : ProcNoItin<"cortex-a57",      [FeatureFPARMv8,
-                                    FeatureNEON,
-                                    FeatureCrypto]>;
+def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
+                                   "Cortex-A57 ARM processors",
+                                   [FeatureFPARMv8,
+                                   FeatureNEON,
+                                   FeatureCrypto]>;
+
+def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
+def : Processor<"cortex-a57", NoItineraries, [ProcA57]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=202767&r1=202766&r2=202767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Mon Mar  3 17:32:47 2014
@@ -350,33 +350,39 @@ multiclass addsub_exts<bit sf, bit op, b
                     outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTB_operand:$Imm3),
                     !strconcat(asmop, "$Rn, $Rm, $Imm3"),
                     [(opfrag exts.ty:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))],
-                    NoItinerary>;
+                    NoItinerary>,
+                 Sched<[WriteALU, ReadALU, ReadALU]>;
     def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001,
                     outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTH_operand:$Imm3),
                     !strconcat(asmop, "$Rn, $Rm, $Imm3"),
                     [(opfrag exts.ty:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))],
-                    NoItinerary>;
+                    NoItinerary>,
+                 Sched<[WriteALU, ReadALU, ReadALU]>;
     def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010,
                     outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTW_operand:$Imm3),
                     !strconcat(asmop, "$Rn, $Rm, $Imm3"),
                     [(opfrag exts.ty:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))],
-                    NoItinerary>;
+                    NoItinerary>,
+                 Sched<[WriteALU, ReadALU, ReadALU]>;
 
     def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100,
                     outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTB_operand:$Imm3),
                     !strconcat(asmop, "$Rn, $Rm, $Imm3"),
                     [(opfrag exts.ty:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))],
-                    NoItinerary>;
+                    NoItinerary>,
+                 Sched<[WriteALU, ReadALU, ReadALU]>;
     def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101,
                     outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTH_operand:$Imm3),
                     !strconcat(asmop, "$Rn, $Rm, $Imm3"),
                     [(opfrag exts.ty:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))],
-                    NoItinerary>;
+                    NoItinerary>,
+                 Sched<[WriteALU, ReadALU, ReadALU]>;
     def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110,
                     outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTW_operand:$Imm3),
                     !strconcat(asmop, "$Rn, $Rm, $Imm3"),
                     [(opfrag exts.ty:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))],
-                    NoItinerary>;
+                    NoItinerary>,
+                 Sched<[WriteALU, ReadALU, ReadALU]>;
 }
 
 // These two could be merge in with the above, but their patterns aren't really
@@ -388,30 +394,32 @@ multiclass addsub_xxtx<bit op, bit S, st
                    (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3),
                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
                    [(opfrag i64:$Rn, (shl i64:$Rm, UXTX_operand:$Imm3))],
-                   NoItinerary>;
+                   NoItinerary>,
+                 Sched<[WriteALU, ReadALU, ReadALU]>;
 
     def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111,
                    outs,
                    (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3),
                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
                    [/* No Pattern: same as uxtx */],
-                   NoItinerary>;
+                   NoItinerary>,
+                 Sched<[WriteALU, ReadALU, ReadALU]>;
 }
 
 multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs> {
     def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011,
-                              outs,
-                              (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3),
-                              !strconcat(asmop, "$Rn, $Rm, $Imm3"),
-                              [/* No pattern: probably same as uxtw */],
-                              NoItinerary>;
+                   outs, (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3),
+                   !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                   [/* No pattern: probably same as uxtw */],
+                   NoItinerary>,
+                 Sched<[WriteALU, ReadALU, ReadALU]>;
 
     def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111,
-                              outs,
-                              (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3),
-                              !strconcat(asmop, "$Rn, $Rm, $Imm3"),
-                              [/* No Pattern: probably same as uxtw */],
-                              NoItinerary>;
+                   outs, (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3),
+                   !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                   [/* No Pattern: probably same as uxtw */],
+                   NoItinerary>,
+                 Sched<[WriteALU, ReadALU, ReadALU]>;
 }
 
 class SetRD<RegisterClass RC, SDPatternOperator op>
@@ -657,7 +665,8 @@ multiclass addsubimm_varieties<string pr
                          (ins GPRsp:$Rn, imm_operand:$Imm12),
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm12"),
                          [(set Ty:$Rd, (add Ty:$Rn, imm_operand:$Imm12))],
-                         NoItinerary>;
+                         NoItinerary>,
+           Sched<[WriteALU, ReadALU]>;
 
 
   // S variants can read SP but would write to ZR
@@ -666,7 +675,8 @@ multiclass addsubimm_varieties<string pr
                          (ins GPRsp:$Rn, imm_operand:$Imm12),
                          !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"),
                          [(set Ty:$Rd, (addc Ty:$Rn, imm_operand:$Imm12))],
-                         NoItinerary> {
+                         NoItinerary>,
+           Sched<[WriteALU, ReadALU]> {
     let Defs = [NZCV];
   }
 
@@ -678,7 +688,8 @@ multiclass addsubimm_varieties<string pr
                             !strconcat(cmpasmop, " $Rn, $Imm12"),
                             [(set NZCV,
                                   (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))],
-                            NoItinerary> {
+                            NoItinerary>,
+           Sched<[WriteALU, ReadALU]> {
     let Rd = 0b11111;
     let Defs = [NZCV];
     let isCompare = 1;
@@ -740,7 +751,7 @@ defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDww
 // Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS
 
 //===-------------------------------
-// 1. The "shifed register" operands. Shared with logical insts.
+// 1. The "shifted register" operands. Shared with logical insts.
 //===-------------------------------
 
 multiclass shift_operands<string prefix, string form> {
@@ -800,7 +811,8 @@ multiclass addsub_shifts<string prefix,
                        [(set GPR:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
                             !cast<Operand>("lsl_operand_" # ty):$Imm6))
                        )],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU]>;
 
   def _lsr : A64I_addsubshift<sf, op, s, 0b01,
                        (outs GPR:$Rd),
@@ -810,7 +822,8 @@ multiclass addsub_shifts<string prefix,
                        [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
                             !cast<Operand>("lsr_operand_" # ty):$Imm6))
                        )],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU]>;
 
   def _asr : A64I_addsubshift<sf, op, s, 0b10,
                        (outs GPR:$Rd),
@@ -820,7 +833,8 @@ multiclass addsub_shifts<string prefix,
                        [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
                             !cast<Operand>("asr_operand_" # ty):$Imm6))
                        )],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU]>;
   }
 
   def _noshift
@@ -906,7 +920,8 @@ multiclass cmp_shifts<string prefix, bit
                        [(set NZCV, (opfrag ty:$Rn, (shl ty:$Rm,
                             !cast<Operand>("lsl_operand_" # ty):$Imm6))
                        )],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteCMP, ReadCMP, ReadCMP]>;
 
   def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01,
                        (outs),
@@ -916,7 +931,8 @@ multiclass cmp_shifts<string prefix, bit
                        [(set NZCV, (opfrag ty:$Rn, (srl ty:$Rm,
                             !cast<Operand>("lsr_operand_" # ty):$Imm6))
                        )],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteCMP, ReadCMP, ReadCMP]>;
 
   def _asr : A64I_addsubshift<sf, op, 0b1, 0b10,
                        (outs),
@@ -926,7 +942,8 @@ multiclass cmp_shifts<string prefix, bit
                        [(set NZCV, (opfrag ty:$Rn, (sra ty:$Rm,
                             !cast<Operand>("asr_operand_" # ty):$Imm6))
                        )],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteCMP, ReadCMP, ReadCMP]>;
   }
 
   def _noshift
@@ -953,12 +970,14 @@ multiclass A64I_addsubcarrySizes<bit op,
     def www : A64I_addsubcarry<0b0, op, s, 0b000000,
                                (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm),
                                !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
-                               [], NoItinerary>;
+                               [], NoItinerary>,
+              Sched<[WriteALU, ReadALU, ReadALU]>;
 
     def xxx : A64I_addsubcarry<0b1, op, s, 0b000000,
                                (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
                                !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
-                               [], NoItinerary>;
+                               [], NoItinerary>,
+              Sched<[WriteALU, ReadALU, ReadALU]>;
   }
 }
 
@@ -1044,14 +1063,16 @@ multiclass A64I_bitfieldSizes<bits<2> op
   def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
                     (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
                     !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
-                    [], NoItinerary> {
+                    [], NoItinerary>,
+             Sched<[WriteALU, ReadALU]> {
     let DecoderMethod = "DecodeBitfieldInstruction";
   }
 
   def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
                     (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
                     !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
-                    [], NoItinerary> {
+                    [], NoItinerary>,
+             Sched<[WriteALU, ReadALU]> {
     let DecoderMethod = "DecodeBitfieldInstruction";
   }
 }
@@ -1064,7 +1085,8 @@ defm UBFM : A64I_bitfieldSizes<0b10, "ub
 def BFMwwii :
   A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
         (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
-        "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+        "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
+  Sched<[WriteALU, ReadALU]> {
   let DecoderMethod = "DecodeBitfieldInstruction";
   let Constraints = "$src = $Rd";
 }
@@ -1072,7 +1094,8 @@ def BFMwwii :
 def BFMxxii :
   A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
         (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
-        "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+        "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
+  Sched<[WriteALU, ReadALU]> {
   let DecoderMethod = "DecodeBitfieldInstruction";
   let Constraints = "$src = $Rd";
 }
@@ -1094,7 +1117,8 @@ class A64I_bf_ext<bit sf, bits<2> opc, R
   : A64I_bitfield<sf, opc, sf,
                   (outs GPRDest:$Rd), (ins GPR32:$Rn),
                   !strconcat(asmop, "\t$Rd, $Rn"),
-                  [(set dty:$Rd, pattern)], NoItinerary> {
+                  [(set dty:$Rd, pattern)], NoItinerary>,
+    Sched<[WriteALU, ReadALU]> {
   let ImmR = 0b000000;
   let ImmS = imms;
 }
@@ -1148,7 +1172,8 @@ multiclass A64I_shift<bits<2> opc, strin
                     (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR),
                     !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
                     [(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))],
-                    NoItinerary> {
+                    NoItinerary>,
+            Sched<[WriteALU, ReadALU]> {
     let ImmS = 31;
   }
 
@@ -1156,7 +1181,8 @@ multiclass A64I_shift<bits<2> opc, strin
                     (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR),
                     !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
                     [(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))],
-                    NoItinerary> {
+                    NoItinerary>,
+            Sched<[WriteALU, ReadALU]> {
     let ImmS = 63;
   }
 
@@ -1197,7 +1223,8 @@ class A64I_bitfield_lsl<bit sf, Register
   : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm),
                   "lsl\t$Rd, $Rn, $FullImm",
                   [(set ty:$Rd, (shl ty:$Rn, operand:$FullImm))],
-                  NoItinerary> {
+                  NoItinerary>,
+    Sched<[WriteALU, ReadALU]> {
   bits<12> FullImm;
   let ImmR = FullImm{5-0};
   let ImmS = FullImm{11-6};
@@ -1244,7 +1271,8 @@ multiclass A64I_bitfield_extract<bits<2>
                        (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
                        !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
                        [(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))],
-                       NoItinerary> {
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU]> {
     // As above, no disassembler allowed.
     let isAsmParserOnly = 1;
   }
@@ -1253,7 +1281,8 @@ multiclass A64I_bitfield_extract<bits<2>
                        (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
                        !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
                        [(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))],
-                       NoItinerary> {
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU]> {
     // As above, no disassembler allowed.
     let isAsmParserOnly = 1;
   }
@@ -1264,16 +1293,18 @@ defm UBFX :  A64I_bitfield_extract<0b10,
 
 // Again, variants based on BFM modify Rd so need it as an input too.
 def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
-           (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
-           "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+                          (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
+                          "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
+                Sched<[WriteALU, ReadALU]> {
   // As above, no disassembler allowed.
   let isAsmParserOnly = 1;
   let Constraints = "$src = $Rd";
 }
 
 def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
-           (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
-           "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+                          (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
+                          "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
+                Sched<[WriteALU, ReadALU]> {
   // As above, no disassembler allowed.
   let isAsmParserOnly = 1;
   let Constraints = "$src = $Rd";
@@ -1353,7 +1384,8 @@ multiclass A64I_bitfield_insert<bits<2>
   def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
                            (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
                            !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
-                           [], NoItinerary> {
+                           [], NoItinerary>,
+             Sched<[WriteALU, ReadALU]> {
     // As above, no disassembler allowed.
     let isAsmParserOnly = 1;
   }
@@ -1361,7 +1393,8 @@ multiclass A64I_bitfield_insert<bits<2>
   def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
                            (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
                            !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
-                           [], NoItinerary> {
+                           [], NoItinerary>,
+             Sched<[WriteALU, ReadALU]> {
     // As above, no disassembler allowed.
     let isAsmParserOnly = 1;
   }
@@ -1373,7 +1406,8 @@ defm UBFIZ :  A64I_bitfield_insert<0b10,
 
 def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
                 (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
-                "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+                "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
+              Sched<[WriteALU, ReadALU]> {
   // As above, no disassembler allowed.
   let isAsmParserOnly = 1;
   let Constraints = "$src = $Rd";
@@ -1381,7 +1415,8 @@ def BFIwwii : A64I_bitfield<0b0, 0b01, 0
 
 def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
                 (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
-                "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+                "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
+              Sched<[WriteALU, ReadALU]> {
   // As above, no disassembler allowed.
   let isAsmParserOnly = 1;
   let Constraints = "$src = $Rd";
@@ -1418,14 +1453,16 @@ multiclass cmpbr_sizes<bit op, string as
                      (ins GPR64:$Rt, bcc_target:$Label),
                      !strconcat(asmop,"\t$Rt, $Label"),
                      [(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)],
-                     NoItinerary>;
+                     NoItinerary>,
+          Sched<[WriteBr, ReadBr]>;
 
   def w : A64I_cmpbr<0b0, op,
                      (outs),
                      (ins GPR32:$Rt, bcc_target:$Label),
                      !strconcat(asmop,"\t$Rt, $Label"),
                      [(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)],
-                     NoItinerary>;
+                     NoItinerary>,
+          Sched<[WriteBr, ReadBr]>;
   }
 }
 
@@ -1456,7 +1493,8 @@ def cond_code : Operand<i32>, ImmLeaf<i3
 def Bcc : A64I_condbr<0b0, 0b0, (outs),
                 (ins cond_code:$Cond, bcc_target:$Label),
                 "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)],
-                NoItinerary> {
+                NoItinerary>,
+          Sched<[WriteBr]> {
   let Uses = [NZCV];
   let isBranch = 1;
   let isTerminator = 1;
@@ -1502,7 +1540,8 @@ class A64I_condcmpimmImpl<bit sf, bit op
   : A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs),
                 (ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond),
                 !strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"),
-                [], NoItinerary> {
+                [], NoItinerary>,
+    Sched<[WriteCMP, ReadCMP]> {
   let Defs = [NZCV];
 }
 
@@ -1568,7 +1607,8 @@ multiclass A64I_condselSizes<bit op, bit
                             (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond),
                             !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
                             [(set i32:$Rd, (select i32:$Rn, i32:$Rm))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteCMP, ReadCMP]>;
 
 
     def xxxc : A64I_condsel<0b1, op, 0b0, op2,
@@ -1576,7 +1616,8 @@ multiclass A64I_condselSizes<bit op, bit
                             (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond),
                             !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
                             [(set i64:$Rd, (select i64:$Rn, i64:$Rm))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteCMP, ReadCMP]>;
   }
 }
 
@@ -1686,7 +1727,8 @@ class A64I_dp_1src_impl<bit sf, bits<6>
                    (outs GPRrc:$Rd),
                    (ins GPRrc:$Rn),
                    patterns,
-                   itin>;
+                   itin>,
+      Sched<[WriteALU, ReadALU]>;
 
 multiclass A64I_dp_1src <bits<6> opcode, string asmop> {
   let hasSideEffects = 0 in {
@@ -1742,7 +1784,8 @@ class dp_2src_impl<bit sf, bits<6> opcod
                    (outs GPRsp:$Rd),
                    (ins GPRsp:$Rn, GPRsp:$Rm),
                    patterns,
-                   itin>;
+                   itin>,
+	  Sched<[WriteALU, ReadALU, ReadALU]>;
 
 multiclass dp_2src_crc<bit c, string asmop> {
   def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0},
@@ -1793,13 +1836,17 @@ multiclass dp_2src <bits<6> opcode, stri
 defm CRC32  : dp_2src_crc<0b0, "crc32">;
 defm CRC32C : dp_2src_crc<0b1, "crc32c">;
 
-defm UDIV : dp_2src<0b000010, "udiv", udiv>;
-defm SDIV : dp_2src<0b000011, "sdiv", sdiv>;
+let SchedRW = [WriteDiv, ReadDiv, ReadDiv] in {
+  defm UDIV : dp_2src<0b000010, "udiv", udiv>;
+  defm SDIV : dp_2src<0b000011, "sdiv", sdiv>;
+}
 
-defm LSLV : dp_2src_zext<0b001000, "lsl", shl>;
-defm LSRV : dp_2src_zext<0b001001, "lsr", srl>;
-defm ASRV : dp_2src_zext<0b001010, "asr", sra>;
-defm RORV : dp_2src_zext<0b001011, "ror", rotr>;
+let SchedRW = [WriteALUs, ReadALU, ReadALU] in {
+  defm LSLV : dp_2src_zext<0b001000, "lsl", shl>;
+  defm LSRV : dp_2src_zext<0b001001, "lsr", srl>;
+  defm ASRV : dp_2src_zext<0b001010, "asr", sra>;
+  defm RORV : dp_2src_zext<0b001011, "ror", rotr>;
+}
 
 // Extra patterns for an incoming 64-bit value for a 32-bit
 // operation. Since the LLVM operations are undefined (as in C) if the
@@ -1832,7 +1879,8 @@ class A64I_dp3_4operand<bit sf, bits<6>
   : A64I_dp3<sf, opcode,
              (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra),
              !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"),
-             [(set AccTy:$Rd, pattern)], NoItinerary> {
+             [(set AccTy:$Rd, pattern)], NoItinerary>,
+    Sched<[WriteMAC, ReadMAC, ReadMAC, ReadMAC]> {
   RegisterClass AccGPR = AccReg;
   RegisterClass SrcGPR = SrcReg;
 }
@@ -1862,13 +1910,15 @@ let isCommutable = 1, PostEncoderMethod
                           (ins GPR64:$Rn, GPR64:$Rm),
                           "umulh\t$Rd, $Rn, $Rm",
                           [(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))],
-                          NoItinerary>;
+                          NoItinerary>,
+                 Sched<[WriteMAC, ReadMAC, ReadMAC]>;
 
   def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd),
                           (ins GPR64:$Rn, GPR64:$Rm),
                           "smulh\t$Rd, $Rn, $Rm",
                           [(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))],
-                          NoItinerary>;
+                          NoItinerary>,
+                 Sched<[WriteMAC, ReadMAC, ReadMAC]>;
 }
 
 multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST,
@@ -1916,7 +1966,8 @@ def uimm16 : Operand<i32> {
 
 class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop>
   : A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16),
-                   !strconcat(asmop, "\t$UImm16"), [], NoItinerary> {
+                   !strconcat(asmop, "\t$UImm16"), [], NoItinerary>,
+    Sched<[WriteBr]> {
   let isBranch = 1;
   let isTerminator = 1;
 }
@@ -1947,14 +1998,16 @@ def EXTRwwwi : A64I_extract<0b0, 0b000,
                             "extr\t$Rd, $Rn, $Rm, $LSB",
                             [(set i32:$Rd,
                                   (A64Extr i32:$Rn, i32:$Rm, imm:$LSB))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteALU, ReadALU, ReadALU]>;
 def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1,
                             (outs GPR64:$Rd),
                             (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB),
                             "extr\t$Rd, $Rn, $Rm, $LSB",
                             [(set i64:$Rd,
                                   (A64Extr i64:$Rn, i64:$Rm, imm:$LSB))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteALU, ReadALU, ReadALU]>;
 
 def : InstAlias<"ror $Rd, $Rs, $LSB",
                (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>;
@@ -2001,12 +2054,14 @@ def fpz64movi : Operand<i64>,
 multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
   def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
                           (outs), ins, "fcmp\t$Rn, $Rm", [pattern],
-                          NoItinerary> {
+                          NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
     let Defs = [NZCV];
   }
 
   def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0},
-                        (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary> {
+                        (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
     let Defs = [NZCV];
   }
 }
@@ -2035,7 +2090,8 @@ class A64I_fpccmpImpl<bits<2> type, bit
                 (outs),
                 (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
                 !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
-                [], NoItinerary> {
+                [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Defs = [NZCV];
 }
 
@@ -2053,9 +2109,10 @@ let Uses = [NZCV] in {
   def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd),
                                  (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond),
                                  "fcsel\t$Rd, $Rn, $Rm, $Cond",
-                                 [(set f32:$Rd, 
+                                 [(set f32:$Rd,
                                        (simple_select f32:$Rn, f32:$Rm))],
-                                 NoItinerary>;
+                                 NoItinerary>,
+                  Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 
   def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd),
@@ -2063,7 +2120,8 @@ let Uses = [NZCV] in {
                                  "fcsel\t$Rd, $Rn, $Rm, $Cond",
                                  [(set f64:$Rd,
                                        (simple_select f64:$Rn, f64:$Rm))],
-                                 NoItinerary>;
+                                 NoItinerary>,
+                  Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2081,18 +2139,22 @@ multiclass A64I_fpdp1sizes<bits<6> opcod
   def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn),
                      !strconcat(asmstr, "\t$Rd, $Rn"),
                      [(set f32:$Rd, (opnode f32:$Rn))],
-                     NoItinerary>;
+                     NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn),
                      !strconcat(asmstr, "\t$Rd, $Rn"),
                      [(set f64:$Rd, (opnode f64:$Rn))],
-                     NoItinerary>;
+                     NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm FMOV   : A64I_fpdp1sizes<0b000000, "fmov">;
 defm FABS   : A64I_fpdp1sizes<0b000001, "fabs", fabs>;
 defm FNEG   : A64I_fpdp1sizes<0b000010, "fneg", fneg>;
-defm FSQRT  : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>;
+let SchedRW = [WriteFPSqrt, ReadFPSqrt] in {
+  defm FSQRT  : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>;
+}
 
 defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">;
 defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>;
@@ -2121,7 +2183,8 @@ class A64I_fpdp1_fcvt<FCVTRegType DestRe
                {0,0,0,1, DestReg.t1, DestReg.t0},
                (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn),
                "fcvt\t$Rd, $Rn",
-               [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>;
+               [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>;
 def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>;
@@ -2146,18 +2209,22 @@ multiclass A64I_fpdp2sizes<bits<4> opcod
                       (ins FPR32:$Rn, FPR32:$Rm),
                       !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
                       [(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))],
-                      NoItinerary>;
+                      NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
   def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode,
                       (outs FPR64:$Rd),
                       (ins FPR64:$Rn, FPR64:$Rm),
                       !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
                       [(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))],
-                      NoItinerary>;
+                      NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 }
 
 let isCommutable = 1 in {
-  defm FMUL   : A64I_fpdp2sizes<0b0000, "fmul", fmul>;
+  let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
+    defm FMUL   : A64I_fpdp2sizes<0b0000, "fmul", fmul>;
+  }
   defm FADD   : A64I_fpdp2sizes<0b0010, "fadd", fadd>;
 
   // No patterns for these.
@@ -2166,12 +2233,16 @@ let isCommutable = 1 in {
   defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>;
   defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>;
 
-  defm FNMUL  : A64I_fpdp2sizes<0b1000, "fnmul",
-                                PatFrag<(ops node:$lhs, node:$rhs),
-                                        (fneg (fmul node:$lhs, node:$rhs))> >;
+  let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
+    defm FNMUL  : A64I_fpdp2sizes<0b1000, "fnmul",
+                                  PatFrag<(ops node:$lhs, node:$rhs),
+                                          (fneg (fmul node:$lhs, node:$rhs))> >;
+  }
 }
 
-defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>;
+let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in {
+  defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>;
+}
 defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>;
 
 //===----------------------------------------------------------------------===//
@@ -2192,7 +2263,8 @@ class A64I_fpdp3Impl<string asmop, Regis
                (ins FPR:$Rn, FPR:$Rm, FPR:$Ra),
                !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"),
                [(set VT:$Rd, (fmakind VT:$Rn, VT:$Rm, VT:$Ra))],
-               NoItinerary>;
+               NoItinerary>,
+    Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]>;
 
 def FMADDssss  : A64I_fpdp3Impl<"fmadd",  FPR32, f32, 0b00, 0b0, 0b0, fma>;
 def FMSUBssss  : A64I_fpdp3Impl<"fmsub",  FPR32, f32, 0b00, 0b0, 0b1, fmsub>;
@@ -2271,14 +2343,15 @@ class cvtfix_i64_op<ValueType FloatVT>
 // worth going for a multiclass here. Oh well.
 
 class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode,
-                   RegisterClass GPR, RegisterClass FPR, 
-                   ValueType DstTy, ValueType SrcTy, 
+                   RegisterClass GPR, RegisterClass FPR,
+                   ValueType DstTy, ValueType SrcTy,
                    Operand scale_op, string asmop, SDNode cvtop>
   : A64I_fpfixed<sf, 0b0, type, 0b11, opcode,
                  (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale),
                  !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
                  [(set DstTy:$Rd, (cvtop (fmul SrcTy:$Rn, scale_op:$Scale)))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32,
                              cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>;
@@ -2307,7 +2380,8 @@ class A64I_fixtofp<bit sf, bits<2> type,
                  (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale),
                  !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
                  [(set DstTy:$Rd, (fdiv (cvtop SrcTy:$Rn), scale_op:$Scale))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32,
                             cvtfix_i32_op<f32>, "scvtf", sint_to_fp>;
@@ -2334,7 +2408,8 @@ def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0
 class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode,
                    RegisterClass DestPR, RegisterClass SrcPR, string asmop>
   : A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn),
-               !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>;
+               !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop> {
   def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0},
@@ -2420,11 +2495,13 @@ def lane1 : Operand<i32> {
 let DecoderMethod =  "DecodeFMOVLaneInstruction" in {
   def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110,
                           (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane),
-                          "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>;
+                          "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]>;
 
   def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111,
                           (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane),
-                          "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>;
+                          "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 let Predicates = [HasFPARMv8] in {
@@ -2471,7 +2548,8 @@ class A64I_fpimm_impl<bits<2> type, Regi
                (ins fmov_operand:$Imm8),
                "fmov\t$Rd, $Imm8",
                [(set VT:$Rd, fmov_operand:$Imm8)],
-               NoItinerary>;
+               NoItinerary>,
+    Sched<[WriteFPALU]>;
 
 def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>;
 def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>;
@@ -2520,7 +2598,8 @@ defm prefetch : namedimm<"prefetch", "A6
 class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg,
                       list<dag> patterns = []>
    : A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19),
-                 "ldr\t$Rt, $Imm19", patterns, NoItinerary>;
+                 "ldr\t$Rt, $Imm19", patterns, NoItinerary>,
+     Sched<[WriteLd]>;
 
 let mayLoad = 1 in {
   def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>;
@@ -2541,12 +2620,14 @@ let mayLoad = 1 in {
                                (outs GPR64:$Rt),
                                (ins ldrlit_label:$Imm19),
                                "ldrsw\t$Rt, $Imm19",
-                               [], NoItinerary>;
+                               [], NoItinerary>,
+                   Sched<[WriteLd]>;
 
   def PRFM_lit : A64I_LDRlit<0b11, 0b0,
                              (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19),
                              "prfm\t$Rt, $Imm19",
-                             [], NoItinerary>;
+                             [], NoItinerary>,
+                 Sched<[WriteLd]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2638,19 +2719,23 @@ class A64I_LRexs_impl<bits<2> size, bits
 multiclass A64I_LRex<string asmstr, bits<3> opcode> {
   def _byte:  A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
                             (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+              Sched<[WriteLd]>;
 
   def _hword:  A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
                             (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteLd]>;
 
   def _word:  A64I_LRexs_impl<0b10, opcode, asmstr,
                             (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+              Sched<[WriteLd]>;
 
   def _dword: A64I_LRexs_impl<0b11, opcode, asmstr,
                             (outs GPR64:$Rt), (ins GPR64xsp0:$Rn),
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+              Sched<[WriteLd]>;
 }
 
 defm LDXR  : A64I_LRex<"ldxr",  0b000>;
@@ -2776,12 +2861,14 @@ multiclass A64I_LPex<string asmstr, bits
   def _word:  A64I_LPexs_impl<0b10, opcode, asmstr,
                             (outs GPR32:$Rt, GPR32:$Rt2),
                             (ins GPR64xsp0:$Rn),
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+              Sched<[WriteLd]>;
 
   def _dword: A64I_LPexs_impl<0b11, opcode, asmstr,
                             (outs GPR64:$Rt, GPR64:$Rt2),
                             (ins GPR64xsp0:$Rn),
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+              Sched<[WriteLd]>;
 }
 
 defm LDXP  : A64I_LPex<"ldxp", 0b010>;
@@ -3004,7 +3091,8 @@ multiclass A64I_LDRSTR_unsigned<string p
   def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1},
                       (outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
                       "ldr" #  asmsuffix # "\t$Rt, [$Rn, $UImm12]",
-                      [], NoItinerary> {
+                      [], NoItinerary>,
+             Sched<[WriteLd, ReadLd]> {
     let mayLoad = 1;
   }
   def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]",
@@ -3016,13 +3104,15 @@ multiclass A64I_LDRSTR_unsigned<string p
                             (outs GPR:$Rt),
                             (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
                             "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+                            Sched<[WriteLd, ReadLd, ReadLd]>;
 
     def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1,
                             (outs GPR:$Rt),
                             (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
                             "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+                            Sched<[WriteLd, ReadLd, ReadLd]>;
   }
   def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]",
         (!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn,
@@ -3058,7 +3148,8 @@ multiclass A64I_LDRSTR_unsigned<string p
   def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1},
                              (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
                              "ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
-                             [], NoItinerary> {
+                             [], NoItinerary>,
+              Sched<[WriteLd, ReadLd]> {
     let mayLoad = 1;
   }
   def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]",
@@ -3081,7 +3172,8 @@ multiclass A64I_LDRSTR_unsigned<string p
                                     (outs GPR:$Rt, GPR64xsp:$Rn_wb),
                                     (ins GPR64xsp:$Rn, simm9:$SImm9),
                                     "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
-                                    [], NoItinerary> {
+                                    [], NoItinerary>,
+                     Sched<[WriteLd, ReadLd]> {
     let mayLoad = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeSingleIndexedInstruction";
@@ -3104,7 +3196,8 @@ multiclass A64I_LDRSTR_unsigned<string p
                                     (outs GPR:$Rt, GPR64xsp:$Rn_wb),
                                     (ins GPR64xsp:$Rn, simm9:$SImm9),
                                     "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
-                                    [], NoItinerary> {
+                                    [], NoItinerary>,
+                    Sched<[WriteLd, ReadLd]> {
     let mayLoad = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeSingleIndexedInstruction";
@@ -3164,7 +3257,8 @@ multiclass A64I_LDR_signed<bits<2> size,
                           (outs GPR32:$Rt),
                           (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
                           "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
-                          [], NoItinerary> {
+                          [], NoItinerary>,
+          Sched<[WriteLd, ReadLd]> {
     let mayLoad = 1;
   }
   def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
@@ -3174,7 +3268,8 @@ multiclass A64I_LDR_signed<bits<2> size,
                           (outs GPR64:$Rt),
                           (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
                           "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
-                          [], NoItinerary> {
+                          [], NoItinerary>,
+          Sched<[WriteLd, ReadLd]> {
     let mayLoad = 1;
   }
   def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
@@ -3186,25 +3281,29 @@ multiclass A64I_LDR_signed<bits<2> size,
                             (outs GPR32:$Rt),
                             (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
                             "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+                         Sched<[WriteLd, ReadLd, ReadLd]>;
 
     def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1,
                             (outs GPR32:$Rt),
                             (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
                             "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+                         Sched<[WriteLd, ReadLd, ReadLd]>;
 
     def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0,
                             (outs GPR64:$Rt),
                             (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
                             "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+                         Sched<[WriteLd, ReadLd, ReadLd]>;
 
     def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1,
                             (outs GPR64:$Rt),
                             (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
                             "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+                         Sched<[WriteLd, ReadLd, ReadLd]>;
   }
   def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
         (!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn,
@@ -3221,13 +3320,15 @@ multiclass A64I_LDR_signed<bits<2> size,
                              (outs GPR32:$Rt),
                              (ins GPR64xsp:$Rn, simm9:$SImm9),
                              "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
-                             [], NoItinerary>;
+                             [], NoItinerary>,
+              Sched<[WriteLd, ReadLd]>;
 
     def x_U : A64I_LSunalimm<size, 0b0, 0b10,
                              (outs GPR64:$Rt),
                              (ins GPR64xsp:$Rn, simm9:$SImm9),
                              "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
-                             [], NoItinerary>;
+                             [], NoItinerary>,
+              Sched<[WriteLd, ReadLd]>;
 
 
     // Post-indexed
@@ -3235,7 +3336,8 @@ multiclass A64I_LDR_signed<bits<2> size,
                                  (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
                                  (ins GPR64xsp:$Rn, simm9:$SImm9),
                                  "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
-                                 [], NoItinerary> {
+                                 [], NoItinerary>,
+                    Sched<[WriteLd, ReadLd]> {
       let Constraints = "$Rn = $Rn_wb";
       let DecoderMethod = "DecodeSingleIndexedInstruction";
     }
@@ -3244,7 +3346,8 @@ multiclass A64I_LDR_signed<bits<2> size,
                                    (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
                                    (ins GPR64xsp:$Rn, simm9:$SImm9),
                                    "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
-                                   [], NoItinerary> {
+                                   [], NoItinerary>,
+                    Sched<[WriteLd, ReadLd]> {
       let Constraints = "$Rn = $Rn_wb";
       let DecoderMethod = "DecodeSingleIndexedInstruction";
     }
@@ -3254,7 +3357,8 @@ multiclass A64I_LDR_signed<bits<2> size,
                                  (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
                                  (ins GPR64xsp:$Rn, simm9:$SImm9),
                                  "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
-                                 [], NoItinerary> {
+                                 [], NoItinerary>,
+                   Sched<[WriteLd, ReadLd]> {
       let Constraints = "$Rn = $Rn_wb";
       let DecoderMethod = "DecodeSingleIndexedInstruction";
     }
@@ -3263,7 +3367,8 @@ multiclass A64I_LDR_signed<bits<2> size,
                                  (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
                                  (ins GPR64xsp:$Rn, simm9:$SImm9),
                                  "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
-                                 [], NoItinerary> {
+                                 [], NoItinerary>,
+                   Sched<[WriteLd, ReadLd]> {
       let Constraints = "$Rn = $Rn_wb";
       let DecoderMethod = "DecodeSingleIndexedInstruction";
     }
@@ -3281,7 +3386,8 @@ def LDRSWx
                     (outs GPR64:$Rt),
                     (ins GPR64xsp:$Rn, word_uimm12:$UImm12),
                     "ldrsw\t$Rt, [$Rn, $UImm12]",
-                    [], NoItinerary> {
+                    [], NoItinerary>,
+      Sched<[WriteLd, ReadLd]> {
   let mayLoad = 1;
 }
 def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
@@ -3291,13 +3397,15 @@ let mayLoad = 1 in {
                              (outs GPR64:$Rt),
                              (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext),
                              "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
-                             [], NoItinerary>;
+                             [], NoItinerary>,
+                            Sched<[WriteLd, ReadLd, ReadLd]>;
 
   def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1,
                              (outs GPR64:$Rt),
                              (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext),
                              "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
-                             [], NoItinerary>;
+                             [], NoItinerary>,
+                            Sched<[WriteLd, ReadLd, ReadLd]>;
 }
 def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]",
                 (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>;
@@ -3308,7 +3416,8 @@ def LDURSWx
                     (outs GPR64:$Rt),
                     (ins GPR64xsp:$Rn, simm9:$SImm9),
                     "ldursw\t$Rt, [$Rn, $SImm9]",
-                    [], NoItinerary> {
+                    [], NoItinerary>,
+      Sched<[WriteLd, ReadLd]> {
   let mayLoad = 1;
 }
 def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
@@ -3318,7 +3427,8 @@ def LDRSWx_PostInd
                     (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
                     (ins GPR64xsp:$Rn, simm9:$SImm9),
                     "ldrsw\t$Rt, [$Rn], $SImm9",
-                    [], NoItinerary> {
+                    [], NoItinerary>,
+      Sched<[WriteLd, ReadLd]> {
   let mayLoad = 1;
   let Constraints = "$Rn = $Rn_wb";
   let DecoderMethod = "DecodeSingleIndexedInstruction";
@@ -3328,7 +3438,8 @@ def LDRSWx_PreInd : A64I_LSpreind<0b10,
                                  (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
                                  (ins GPR64xsp:$Rn, simm9:$SImm9),
                                  "ldrsw\t$Rt, [$Rn, $SImm9]!",
-                                 [], NoItinerary> {
+                                 [], NoItinerary>,
+                    Sched<[WriteLd, ReadLd]> {
   let mayLoad = 1;
   let Constraints = "$Rn = $Rn_wb";
   let DecoderMethod = "DecodeSingleIndexedInstruction";
@@ -3341,7 +3452,8 @@ def LDRSWx_PreInd : A64I_LSpreind<0b10,
 def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs),
                  (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12),
                  "prfm\t$Rt, [$Rn, $UImm12]",
-                 [], NoItinerary> {
+                 [], NoItinerary>,
+           Sched<[WritePreLd, ReadPreLd]> {
   let mayLoad = 1;
 }
 def : InstAlias<"prfm $Rt, [$Rn]",
@@ -3352,12 +3464,14 @@ let mayLoad = 1 in {
                                         (ins prefetch_op:$Rt, GPR64xsp:$Rn,
                                              GPR32:$Rm, dword_Wm_regext:$Ext),
                                         "prfm\t$Rt, [$Rn, $Rm, $Ext]",
-                                        [], NoItinerary>;
+                                        [], NoItinerary>,
+                          Sched<[WritePreLd, ReadPreLd]>;
   def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs),
                                         (ins prefetch_op:$Rt, GPR64xsp:$Rn,
                                              GPR64:$Rm, dword_Xm_regext:$Ext),
                                         "prfm\t$Rt, [$Rn, $Rm, $Ext]",
-                                        [], NoItinerary>;
+                                        [], NoItinerary>,
+                          Sched<[WritePreLd, ReadPreLd]>;
 }
 
 def : InstAlias<"prfm $Rt, [$Rn, $Rm]",
@@ -3368,7 +3482,8 @@ def : InstAlias<"prfm $Rt, [$Rn, $Rm]",
 def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs),
                          (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
                          "prfum\t$Rt, [$Rn, $SImm9]",
-                         [], NoItinerary> {
+                         [], NoItinerary>,
+            Sched<[WritePreLd, ReadPreLd]> {
   let mayLoad = 1;
 }
 def : InstAlias<"prfum $Rt, [$Rn]",
@@ -3388,7 +3503,8 @@ multiclass A64I_LDTRSTTR<bits<2> size, s
   def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00,
                               (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
                               "sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
-                              [], NoItinerary> {
+                              [], NoItinerary>,
+                    Sched<[WriteLd, ReadLd]> {
     let mayStore = 1;
   }
 
@@ -3398,7 +3514,8 @@ multiclass A64I_LDTRSTTR<bits<2> size, s
   def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01,
                                (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
                                "ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
-                               [], NoItinerary> {
+                               [], NoItinerary>,
+                    Sched<[WriteLd, ReadLd]> {
     let mayLoad = 1;
   }
 
@@ -3427,13 +3544,15 @@ multiclass A64I_LDTR_signed<bits<2> size
                           (outs GPR32:$Rt),
                           (ins GPR64xsp:$Rn, simm9:$SImm9),
                           "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+            Sched<[WriteLd, ReadLd]>;
 
     def x : A64I_LSunpriv<size, 0b0, 0b10,
                           (outs GPR64:$Rt),
                           (ins GPR64xsp:$Rn, simm9:$SImm9),
                           "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+            Sched<[WriteLd, ReadLd]>;
   }
 
   def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
@@ -3454,7 +3573,8 @@ def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0
                             (outs GPR64:$Rt),
                             (ins GPR64xsp:$Rn, simm9:$SImm9),
                             "ldtrsw\t$Rt, [$Rn, $SImm9]",
-                            [], NoItinerary> {
+                            [], NoItinerary>,
+              Sched<[WriteLd, ReadLd]> {
   let mayLoad = 1;
 }
 def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
@@ -3516,7 +3636,8 @@ multiclass A64I_LSPsimple<bits<2> opc, b
                           Operand simm7, string prefix> {
   def _STR : A64I_LSPoffset<opc, v, 0b0, (outs),
                     (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
-                    "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+                    "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
+             Sched<[WriteLd, ReadLd]> {
     let mayStore = 1;
     let DecoderMethod = "DecodeLDSTPairInstruction";
   }
@@ -3527,7 +3648,8 @@ multiclass A64I_LSPsimple<bits<2> opc, b
   def _LDR : A64I_LSPoffset<opc, v, 0b1,
                             (outs SomeReg:$Rt, SomeReg:$Rt2),
                             (ins GPR64xsp:$Rn, simm7:$SImm7),
-                            "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+                            "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
+             Sched<[WriteLd, ReadLd]> {
     let mayLoad = 1;
     let DecoderMethod = "DecodeLDSTPairInstruction";
   }
@@ -3553,7 +3675,8 @@ multiclass A64I_LSPsimple<bits<2> opc, b
                         (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
                         (ins GPR64xsp:$Rn, simm7:$SImm7),
                         "ldp\t$Rt, $Rt2, [$Rn], $SImm7",
-                        [], NoItinerary> {
+                        [], NoItinerary>,
+                     Sched<[WriteLd, ReadLd]> {
     let mayLoad = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeLDSTPairInstruction";
@@ -3572,7 +3695,8 @@ multiclass A64I_LSPsimple<bits<2> opc, b
                               (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
                               (ins GPR64xsp:$Rn, simm7:$SImm7),
                               "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!",
-                              [], NoItinerary> {
+                              [], NoItinerary>,
+                    Sched<[WriteLd, ReadLd]> {
     let mayLoad = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeLDSTPairInstruction";
@@ -3591,7 +3715,8 @@ multiclass A64I_LSPsimple<bits<2> opc, b
   def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1,
                             (outs SomeReg:$Rt, SomeReg:$Rt2),
                             (ins GPR64xsp:$Rn, simm7:$SImm7),
-                            "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+                            "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
+                     Sched<[WriteLd, ReadLd]> {
     let mayLoad = 1;
     let DecoderMethod = "DecodeLDSTPairInstruction";
   }
@@ -3616,7 +3741,8 @@ defm LSFPPair128 : A64I_LSPsimple<0b10,
 def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
                            (outs GPR64:$Rt, GPR64:$Rt2),
                            (ins GPR64xsp:$Rn, word_simm7:$SImm7),
-                           "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+                           "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
+             Sched<[WriteLd, ReadLd]> {
   let mayLoad = 1;
   let DecoderMethod = "DecodeLDSTPairInstruction";
 }
@@ -3637,7 +3763,8 @@ def LDPSWx_PreInd : A64I_LSPpreind<0b01,
                                    (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
                                    (ins GPR64xsp:$Rn, word_simm7:$SImm7),
                                    "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!",
-                                   [], NoItinerary> {
+                                   [], NoItinerary>,
+                    Sched<[WriteLd, ReadLd]> {
   let mayLoad = 1;
   let Constraints = "$Rn = $Rn_wb";
   let DecoderMethod = "DecodeLDSTPairInstruction";
@@ -3682,14 +3809,16 @@ multiclass A64I_logimmSizes<bits<2> opc,
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
                          [(set i32:$Rd,
                                (opnode i32:$Rn, logical_imm32_operand:$Imm))],
-                         NoItinerary>;
+                         NoItinerary>,
+            Sched<[WriteALU, ReadALU]>;
 
   def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd),
                          (ins GPR64:$Rn, logical_imm64_operand:$Imm),
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
                          [(set i64:$Rd,
                                (opnode i64:$Rn, logical_imm64_operand:$Imm))],
-                         NoItinerary>;
+                         NoItinerary>,
+            Sched<[WriteALU, ReadALU]>;
 }
 
 defm AND : A64I_logimmSizes<0b00, "and", and>;
@@ -3700,12 +3829,14 @@ let Defs = [NZCV] in {
   def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd),
                                 (ins GPR32:$Rn, logical_imm32_operand:$Imm),
                                 "ands\t$Rd, $Rn, $Imm",
-                                [], NoItinerary>;
+                                [], NoItinerary>,
+                Sched<[WriteALU, ReadALU]>;
 
   def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd),
                                 (ins GPR64:$Rn, logical_imm64_operand:$Imm),
                                 "ands\t$Rd, $Rn, $Imm",
-                                [], NoItinerary>;
+                                [], NoItinerary>,
+                Sched<[WriteALU, ReadALU]>;
 }
 
 
@@ -3750,7 +3881,8 @@ multiclass logical_shifts<string prefix,
                        [(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
                             !cast<Operand>("lsl_operand_" # ty):$Imm6))
                        )],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU, ReadALU]>;
 
   def _lsr : A64I_logicalshift<sf, opc, 0b01, N,
                        (outs GPR:$Rd),
@@ -3760,7 +3892,8 @@ multiclass logical_shifts<string prefix,
                        [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
                             !cast<Operand>("lsr_operand_" # ty):$Imm6))
                        )],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU, ReadALU]>;
 
   def _asr : A64I_logicalshift<sf, opc, 0b10, N,
                        (outs GPR:$Rd),
@@ -3770,7 +3903,8 @@ multiclass logical_shifts<string prefix,
                        [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
                             !cast<Operand>("asr_operand_" # ty):$Imm6))
                        )],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU, ReadALU]>;
 
   def _ror : A64I_logicalshift<sf, opc, 0b11, N,
                        (outs GPR:$Rd),
@@ -3780,7 +3914,8 @@ multiclass logical_shifts<string prefix,
                        [(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm,
                             !cast<Operand>("ror_operand_" # ty):$Imm6))
                        )],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU, ReadALU]>;
   }
 
   def _noshift
@@ -3835,7 +3970,8 @@ multiclass tst_shifts<string prefix, bit
                        [(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm,
                            !cast<Operand>("lsl_operand_" # ty):$Imm6)),
                                           0, signed_cond))],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU, ReadALU]>;
 
 
   def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0,
@@ -3846,7 +3982,8 @@ multiclass tst_shifts<string prefix, bit
                        [(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm,
                            !cast<Operand>("lsr_operand_" # ty):$Imm6)),
                                           0, signed_cond))],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU, ReadALU]>;
 
   def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0,
                        (outs),
@@ -3856,7 +3993,8 @@ multiclass tst_shifts<string prefix, bit
                        [(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm,
                            !cast<Operand>("asr_operand_" # ty):$Imm6)),
                                           0, signed_cond))],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU, ReadALU]>;
 
   def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0,
                        (outs),
@@ -3866,7 +4004,8 @@ multiclass tst_shifts<string prefix, bit
                        [(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm,
                            !cast<Operand>("ror_operand_" # ty):$Imm6)),
                                           0, signed_cond))],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU, ReadALU]>;
   }
 
   def _noshift : InstAlias<"tst $Rn, $Rm",
@@ -3889,7 +4028,8 @@ multiclass mvn_shifts<string prefix, bit
                        "mvn\t$Rd, $Rm, $Imm6",
                        [(set ty:$Rd, (not (shl ty:$Rm,
                          !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU, ReadALU]>;
 
 
   def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1,
@@ -3899,7 +4039,8 @@ multiclass mvn_shifts<string prefix, bit
                        "mvn\t$Rd, $Rm, $Imm6",
                        [(set ty:$Rd, (not (srl ty:$Rm,
                          !cast<Operand>("lsr_operand_" # ty):$Imm6)))],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU, ReadALU]>;
 
   def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1,
                        (outs GPR:$Rd),
@@ -3908,7 +4049,8 @@ multiclass mvn_shifts<string prefix, bit
                        "mvn\t$Rd, $Rm, $Imm6",
                        [(set ty:$Rd, (not (sra ty:$Rm,
                          !cast<Operand>("asr_operand_" # ty):$Imm6)))],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU, ReadALU]>;
 
   def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1,
                        (outs GPR:$Rd),
@@ -3917,7 +4059,8 @@ multiclass mvn_shifts<string prefix, bit
                        "mvn\t$Rd, $Rm, $Imm6",
                        [(set ty:$Rd, (not (rotr ty:$Rm,
                          !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
-                       NoItinerary>;
+                       NoItinerary>,
+             Sched<[WriteALU, ReadALU, ReadALU]>;
   }
 
   def _noshift : InstAlias<"mvn $Rn, $Rm",
@@ -3972,7 +4115,8 @@ multiclass A64I_movwSizes<bits<2> opc, s
 
   def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit,
                       !strconcat(asmop, "\t$Rd, $FullImm"),
-                      [], NoItinerary> {
+                      [], NoItinerary>,
+            Sched<[WriteALU]> {
     bits<18> FullImm;
     let UImm16 = FullImm{15-0};
     let Shift = FullImm{17-16};
@@ -3980,7 +4124,8 @@ multiclass A64I_movwSizes<bits<2> opc, s
 
   def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit,
                       !strconcat(asmop, "\t$Rd, $FullImm"),
-                      [], NoItinerary> {
+                      [], NoItinerary>,
+            Sched<[WriteALU]> {
     bits<18> FullImm;
     let UImm16 = FullImm{15-0};
     let Shift = FullImm{17-16};
@@ -4088,10 +4233,12 @@ def adrp_label : Operand<i64> {
 
 let hasSideEffects = 0 in {
   def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label),
-                         "adr\t$Rd, $Label", [], NoItinerary>;
+                         "adr\t$Rd, $Label", [], NoItinerary>,
+              Sched<[WriteALUs]>;
 
   def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label),
-                          "adrp\t$Rd, $Label", [], NoItinerary>;
+                          "adrp\t$Rd, $Label", [], NoItinerary>,
+               Sched<[WriteALUs]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -4377,14 +4524,16 @@ let isBranch = 1, isTerminator = 1 in {
                         "tbz\t$Rt, $Imm, $Label",
                         [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
                                    A64eq, bb:$Label)],
-                        NoItinerary>;
+                        NoItinerary>,
+               Sched<[WriteBr]>;
 
   def TBNZxii : A64I_TBimm<0b1, (outs),
                         (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
                         "tbnz\t$Rt, $Imm, $Label",
                         [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
                                    A64ne, bb:$Label)],
-                        NoItinerary>;
+                        NoItinerary>,
+                Sched<[WriteBr]>;
 
 
   // Note, these instructions overlap with the above 64-bit patterns. This is
@@ -4396,7 +4545,8 @@ let isBranch = 1, isTerminator = 1 in {
                         "tbz\t$Rt, $Imm, $Label",
                         [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
                                    A64eq, bb:$Label)],
-                        NoItinerary> {
+                        NoItinerary>,
+               Sched<[WriteBr]> {
     let Imm{5} = 0b0;
   }
 
@@ -4405,7 +4555,8 @@ let isBranch = 1, isTerminator = 1 in {
                         "tbnz\t$Rt, $Imm, $Label",
                         [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
                                    A64ne, bb:$Label)],
-                        NoItinerary> {
+                        NoItinerary>,
+                Sched<[WriteBr]> {
     let Imm{5} = 0b0;
   }
 }
@@ -4440,7 +4591,8 @@ def blimm_target : Operand<i64> {
 class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type>
   : A64I_Bimm<op, (outs), (ins lbl_type:$Label),
               !strconcat(asmop, "\t$Label"), patterns,
-              NoItinerary>;
+              NoItinerary>,
+    Sched<[WriteBr]>;
 
 let isBranch = 1 in {
   def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> {
@@ -4448,10 +4600,12 @@ let isBranch = 1 in {
     let isBarrier = 1;
   }
 
-  def BLimm : A64I_BimmImpl<0b1, "bl",
-                            [(AArch64Call tglobaladdr:$Label)], blimm_target> {
-    let isCall = 1;
-    let Defs = [X30];
+  let SchedRW = [WriteBrL] in {
+    def BLimm : A64I_BimmImpl<0b1, "bl",
+                              [(AArch64Call tglobaladdr:$Label)], blimm_target> {
+      let isCall = 1;
+      let Defs = [X30];
+    }
   }
 }
 
@@ -4468,7 +4622,8 @@ class A64I_BregImpl<bits<4> opc,
                     dag outs, dag ins, string asmstr, list<dag> patterns,
                     InstrItinClass itin = NoItinerary>
   : A64I_Breg<opc, 0b11111, 0b000000, 0b00000,
-              outs, ins, asmstr, patterns, itin> {
+              outs, ins, asmstr, patterns, itin>,
+    Sched<[WriteBr]> {
   let isBranch         = 1;
   let isIndirectBranch = 1;
 }
@@ -4484,11 +4639,13 @@ let isBranch = 1 in {
     let isTerminator = 1;
   }
 
-  def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn),
-                           "blr\t$Rn", [(AArch64Call i64:$Rn)]> {
-    let isBarrier = 0;
-    let isCall = 1;
-    let Defs = [X30];
+  let SchedRW = [WriteBrL] in {
+    def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn),
+                             "blr\t$Rn", [(AArch64Call i64:$Rn)]> {
+      let isBarrier = 0;
+      let isCall = 1;
+      let Defs = [X30];
+    }
   }
 
   def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn),

Modified: llvm/trunk/lib/Target/AArch64/AArch64Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Schedule.td?rev=202767&r1=202766&r2=202767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64Schedule.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64Schedule.td Mon Mar  3 17:32:47 2014
@@ -7,4 +7,66 @@
 //
 //===----------------------------------------------------------------------===//
 
+//===----------------------------------------------------------------------===//
+// Generic processor itineraries for legacy compatibility.
+
 def GenericItineraries : ProcessorItineraries<[], [], []>;
+
+
+//===----------------------------------------------------------------------===//
+// Base SchedReadWrite types
+
+// Basic ALU
+def WriteALU : SchedWrite;  // Generic: may contain shift and/or ALU operation
+def WriteALUs : SchedWrite; // Shift only with no ALU operation
+def ReadALU : SchedRead;    // Operand not needed for shifting
+def ReadALUs : SchedRead;   // Operand needed for shifting
+
+// Multiply with optional accumulate
+def WriteMAC : SchedWrite;
+def ReadMAC : SchedRead;
+
+// Compares
+def WriteCMP : SchedWrite;
+def ReadCMP : SchedRead;
+
+// Division
+def WriteDiv : SchedWrite;
+def ReadDiv : SchedRead;
+
+// Loads
+def WriteLd : SchedWrite;
+def WritePreLd : SchedWrite;
+def ReadLd : SchedRead;
+def ReadPreLd : SchedRead;
+
+// Branches
+def WriteBr : SchedWrite;
+def WriteBrL : SchedWrite;
+def ReadBr : SchedRead;
+
+// Floating Point ALU
+def WriteFPALU : SchedWrite;
+def ReadFPALU : SchedRead;
+
+// Floating Point MAC, Mul, Div, Sqrt
+//   Most processors will simply send all of these down a dedicated pipe, but
+//   they're explicitly seperated here for flexibility of modeling later. May
+//   consider consolidating them into a single WriteFPXXXX type in the future.
+def WriteFPMAC : SchedWrite;
+def WriteFPMul : SchedWrite;
+def WriteFPDiv : SchedWrite;
+def WriteFPSqrt : SchedWrite;
+def ReadFPMAC : SchedRead;
+def ReadFPMul : SchedRead;
+def ReadFPDiv : SchedRead;
+def ReadFPSqrt : SchedRead;
+
+// Noop
+def WriteNoop : SchedWrite;
+
+
+//===----------------------------------------------------------------------===//
+// Subtarget specific Machine Models.
+
+include "AArch64ScheduleA53.td"

Added: llvm/trunk/lib/Target/AArch64/AArch64ScheduleA53.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ScheduleA53.td?rev=202767&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ScheduleA53.td (added)
+++ llvm/trunk/lib/Target/AArch64/AArch64ScheduleA53.td Mon Mar  3 17:32:47 2014
@@ -0,0 +1,130 @@
+//=- AArch64ScheduleA53.td - ARM Cortex-A53 Scheduling Definitions -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A53 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See MCSchedModel.h for details.
+
+// Cortex-A53 machine model for scheduling and other instruction cost heuristics.
+def CortexA53Model : SchedMachineModel {
+  let IssueWidth = 2;  // 2 micro-ops are dispatched per cycle.
+  let MinLatency = 1 ; // OperandCycles are interpreted as MinLatency.
+  let LoadLatency = 2; // Optimistic load latency assuming bypass.
+                       // This is overriden by OperandCycles if the
+                       // Itineraries are queried instead.
+  let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation
+                             // Specification - Instruction Timings"
+                             // v 1.0 Spreadsheet
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the default BufferSize = -1.
+// Cortex-A53 is in-order and therefore should be using BufferSize = 0. The
+// current configuration performs better with the basic latencies provided so
+// far. Will revisit BufferSize once the latency information is more accurate.
+
+let SchedModel = CortexA53Model in {
+
+def A53UnitALU    : ProcResource<2>;                        // Int ALU
+def A53UnitMAC    : ProcResource<1>;                        // Int MAC
+def A53UnitDiv    : ProcResource<1>;                        // Int Division
+def A53UnitLdSt   : ProcResource<1>;                        // Load/Store
+def A53UnitB      : ProcResource<1>;                        // Branch
+def A53UnitFPALU  : ProcResource<1>;                        // FP ALU
+def A53UnitFPMDS  : ProcResource<1>;                        // FP Mult/Div/Sqrt
+
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types which both map the ProcResources and
+// set the latency.
+
+// Issue - Every instruction must consume an A53WriteIssue. Optionally,
+//         instructions that cannot be dual-issued will also include the
+//         A53WriteIssue2nd in their SchedRW list. That second WriteRes will
+//         ensure that a second issue slot is consumed.
+def A53WriteIssue : SchedWriteRes<[]>;
+def A53WriteIssue2nd : SchedWriteRes<[]> { let Latency = 0; }
+
+// ALU - These are reduced to 1 despite a true latency of 4 in order to easily
+//       model forwarding logic. Once forwarding is properly modelled, then
+//       they'll be corrected.
+def : WriteRes<WriteALU, [A53UnitALU]> { let Latency = 1; }
+def : WriteRes<WriteALUs, [A53UnitALU]> { let Latency = 1; }
+def : WriteRes<WriteCMP, [A53UnitALU]> { let Latency = 1; }
+
+// MAC
+def : WriteRes<WriteMAC, [A53UnitMAC]> { let Latency = 4; }
+
+// Div
+def : WriteRes<WriteDiv, [A53UnitDiv]> { let Latency = 4; }
+
+// Load
+def : WriteRes<WriteLd, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WritePreLd, [A53UnitLdSt]> { let Latency = 4; }
+
+// Branch
+def : WriteRes<WriteBr, [A53UnitB]>;
+def : WriteRes<WriteBrL, [A53UnitB]>;
+
+// FP ALU
+def : WriteRes<WriteFPALU, [A53UnitFPALU]> {let Latency = 6; }
+
+// FP MAC, Mul, Div, Sqrt
+//   Using Double Precision numbers for now as a worst case. Additionally, not
+//   modeling the exact hazard but instead treating the whole pipe as a hazard.
+//   As an example VMUL, VMLA, and others are actually pipelined. VDIV and VSQRT
+//   have a total latency of 33 and 32 respectively but only a hazard of 29 and
+//   28 (double-prescion example).
+def : WriteRes<WriteFPMAC, [A53UnitFPMDS]> { let Latency = 10; }
+def : WriteRes<WriteFPMul, [A53UnitFPMDS]> { let Latency = 6; }
+def : WriteRes<WriteFPDiv, [A53UnitFPMDS]> { let Latency = 33;
+                                             let ResourceCycles = [29]; }
+def : WriteRes<WriteFPSqrt, [A53UnitFPMDS]> { let Latency = 32;
+                                              let ResourceCycles = [28]; }
+
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types.
+
+// No forwarding defined for ReadALU yet.
+def : ReadAdvance<ReadALU, 0>;
+
+// No forwarding defined for ReadCMP yet.
+def : ReadAdvance<ReadCMP, 0>;
+
+// No forwarding defined for ReadBr yet.
+def : ReadAdvance<ReadBr, 0>;
+
+// No forwarding defined for ReadMAC yet.
+def : ReadAdvance<ReadMAC, 0>;
+
+// No forwarding defined for ReadDiv yet.
+def : ReadAdvance<ReadDiv, 0>;
+
+// No forwarding defined for ReadLd, ReadPreLd yet.
+def : ReadAdvance<ReadLd, 0>;
+def : ReadAdvance<ReadPreLd, 0>;
+
+// No forwarding defined for ReadFPALU yet.
+def : ReadAdvance<ReadFPALU, 0>;
+
+// No forwarding defined for ReadFPMAC/Mul/Div/Sqrt yet.
+def : ReadAdvance<ReadFPMAC, 0>;
+def : ReadAdvance<ReadFPMul, 0>;
+def : ReadAdvance<ReadFPDiv, 0>;
+def : ReadAdvance<ReadFPSqrt, 0>;
+
+}

Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h?rev=202767&r1=202766&r2=202767&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h Mon Mar  3 17:32:47 2014
@@ -29,6 +29,11 @@ class GlobalValue;
 class AArch64Subtarget : public AArch64GenSubtargetInfo {
   virtual void anchor();
 protected:
+  enum ARMProcFamilyEnum {Others, CortexA53, CortexA57};
+
+  /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
+  ARMProcFamilyEnum ARMProcFamily;
+
   bool HasFPARMv8;
   bool HasNEON;
   bool HasCrypto;

Added: llvm/trunk/test/CodeGen/AArch64/misched-basic-A53.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/misched-basic-A53.ll?rev=202767&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/misched-basic-A53.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/misched-basic-A53.ll Mon Mar  3 17:32:47 2014
@@ -0,0 +1,83 @@
+; REQUIRES: asserts
+; RUN: llc < %s -march=aarch64 -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+;
+; The Cortex-A53 machine model will cause the MADD instruction to be scheduled
+; much higher than the ADD instructions in order to hide latency. When not
+; specifying a subtarget, the MADD will remain near the end of the block.
+; CHECK: main
+; CHECK: *** Final schedule for BB#2 ***
+; CHECK: SU(13)
+; CHECK: MADDwwww
+; CHECK: SU(4)
+; CHECK: ADDwwi_lsl0_s
+; CHECK: ********** MI Scheduling **********
+ at main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4
+ at main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %x = alloca [8 x i32], align 4
+  %y = alloca [8 x i32], align 4
+  %i = alloca i32, align 4
+  %xx = alloca i32, align 4
+  %yy = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = bitcast [8 x i32]* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false)
+  %1 = bitcast [8 x i32]* %y to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false)
+  store i32 0, i32* %xx, align 4
+  store i32 0, i32* %yy, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %2 = load i32* %i, align 4
+  %cmp = icmp slt i32 %2, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %3 = load i32* %i, align 4
+  %idxprom = sext i32 %3 to i64
+  %arrayidx = getelementptr inbounds [8 x i32]* %x, i32 0, i64 %idxprom
+  %4 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %4, 1
+  store i32 %add, i32* %xx, align 4
+  %5 = load i32* %xx, align 4
+  %add1 = add nsw i32 %5, 12
+  store i32 %add1, i32* %xx, align 4
+  %6 = load i32* %xx, align 4
+  %add2 = add nsw i32 %6, 23
+  store i32 %add2, i32* %xx, align 4
+  %7 = load i32* %xx, align 4
+  %add3 = add nsw i32 %7, 34
+  store i32 %add3, i32* %xx, align 4
+  %8 = load i32* %i, align 4
+  %idxprom4 = sext i32 %8 to i64
+  %arrayidx5 = getelementptr inbounds [8 x i32]* %y, i32 0, i64 %idxprom4
+  %9 = load i32* %arrayidx5, align 4
+  %10 = load i32* %yy, align 4
+  %mul = mul nsw i32 %10, %9
+  store i32 %mul, i32* %yy, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %11 = load i32* %i, align 4
+  %inc = add nsw i32 %11, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %12 = load i32* %xx, align 4
+  %13 = load i32* %yy, align 4
+  %add6 = add nsw i32 %12, %13
+  ret i32 %add6
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }