[llvm] r205090 - ARM64: initial backend import

Tim Northover tnorthover at apple.com
Sat Mar 29 03:18:15 PDT 2014


Added: llvm/trunk/lib/Target/ARM64/ARM64PerfectShuffle.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64PerfectShuffle.h?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64PerfectShuffle.h (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64PerfectShuffle.h Sat Mar 29 05:18:08 2014
@@ -0,0 +1,6586 @@
+//===-- ARM64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle using AdvSIMD instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 242 entries have cost 1
+// 1447 entries have cost 2
+// 3602 entries have cost 3
+// 1237 entries have cost 4
+// 2 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+  135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS
+  1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
+  2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
+  2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+  1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
+  2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
+  2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
+  2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+  135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS
+  2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
+  1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS
+  1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+  2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
+  2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
+  2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
+  3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
+  2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
+  1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS
+  3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
+  3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
+  1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS
+  2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
+  3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
+  3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
+  2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
+  2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+  1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS
+  2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+  2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
+  2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
+  2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
+  2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
+  3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
+  3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
+  3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
+  2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+  2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
+  2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+  3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+  3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
+  3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
+  1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
+  2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+  3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
+  1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
+  2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
+  2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
+  3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
+  3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
+  2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
+  2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
+  2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
+  2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
+  2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
+  2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
+  3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
+  2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
+  3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
+  3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
+  3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
+  2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
+  2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
+  2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
+  2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+  3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
+  3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
+  3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
+  2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
+  3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
+  3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
+  2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
+  2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+  135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS
+  1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS
+  1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS
+  2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
+  1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
+  1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
+  2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+  2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
+  135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS
+  2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
+  1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
+  2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
+  2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
+  2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
+  4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
+  2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
+  2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
+  1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
+  1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
+  2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
+  2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
+  2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
+  1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
+  2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
+  2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
+  2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
+  1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
+  1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
+  3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
+  2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
+  835584U, // <0,1,2,3>: Cost 0 copy LHS
+  1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
+  3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
+  2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
+  1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
+  835584U, // <0,1,2,u>: Cost 0 copy LHS
+  2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
+  2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
+  2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
+  2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
+  2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
+  2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
+  2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
+  2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
+  2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
+  2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
+  4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
+  2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
+  2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
+  2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
+  1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
+  2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+  2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
+  1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
+  3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
+  2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
+  3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
+  2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
+  2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
+  2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
+  2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
+  2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
+  2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
+  2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
+  3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
+  2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
+  3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
+  2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
+  3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
+  2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
+  1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
+  1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
+  2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
+  2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
+  2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
+  2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
+  2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
+  3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
+  2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
+  2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
+  2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
+  1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
+  1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
+  2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
+  835584U, // <0,1,u,3>: Cost 0 copy LHS
+  1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
+  1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
+  2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+  1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
+  835584U, // <0,1,u,u>: Cost 0 copy LHS
+  2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
+  1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
+  1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS
+  2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
+  2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
+  2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
+  2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
+  2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
+  1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS
+  2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
+  2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
+  2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
+  2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+  2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
+  2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
+  2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
+  3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
+  2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
+  1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
+  2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2>
+  2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
+  3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+  1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
+  2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
+  2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
+  2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
+  1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
+  2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
+  2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+  3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
+  2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+  2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
+  2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+  3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
+  2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
+  2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+  2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
+  4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
+  2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4>
+  2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
+  2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
+  1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
+  1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS
+  2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+  1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS
+  2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
+  2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
+  3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
+  3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
+  2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
+  2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
+  2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
+  2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+  2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
+  2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
+  3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
+  2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
+  2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
+  2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
+  3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
+  2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
+  2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
+  2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
+  2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+  2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
+  3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
+  2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
+  2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
+  2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
+  3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
+  2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
+  2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+  1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
+  1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
+  1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS
+  3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+  1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
+  1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
+  1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS
+  2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+  1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS
+  2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
+  2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
+  2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
+  4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
+  2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
+  4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
+  3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
+  3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
+  2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
+  2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
+  2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
+  2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
+  2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
+  2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
+  4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
+  3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
+  3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
+  2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
+  1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
+  1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
+  2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
+  3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+  1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
+  3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+  2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
+  2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
+  1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
+  3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
+  2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
+  3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
+  2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
+  3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
+  3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
+  3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
+  3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
+  2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
+  3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
+  2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4>
+  2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4>
+  4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
+  3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
+  2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
+  3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
+  3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
+  2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
+  3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
+  3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
+  4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
+  3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
+  2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
+  3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
+  2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
+  2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
+  2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5>
+  2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
+  3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
+  3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
+  3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
+  3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
+  3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6>
+  3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
+  2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
+  2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
+  3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
+  2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
+  3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
+  3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
+  3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
+  3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
+  3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
+  3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
+  2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
+  1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
+  1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
+  2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
+  3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+  1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
+  3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+  2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
+  2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
+  1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
+  2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
+  2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
+  2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
+  3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
+  2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
+  2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
+  3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+  3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
+  3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
+  2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
+  3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
+  3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
+  2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
+  2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
+  1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS
+  3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
+  2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+  1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS
+  2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
+  2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2>
+  3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
+  2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
+  2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
+  2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
+  1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS
+  2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+  1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS
+  3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
+  3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
+  3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
+  3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
+  3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
+  3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
+  4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
+  3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
+  3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
+  2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
+  3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
+  3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
+  3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
+  2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
+  2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
+  2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
+  3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
+  2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
+  2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
+  2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
+  3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
+  2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
+  2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
+  2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
+  1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+  2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
+  1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+  2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
+  3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
+  2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
+  3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
+  2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
+  3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
+  2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
+  2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
+  2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
+  3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
+  3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
+  3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
+  3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
+  3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
+  2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+  2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
+  3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
+  2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+  2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
+  2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
+  2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
+  2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
+  2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
+  1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS
+  1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS
+  2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
+  1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS
+  3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
+  2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
+  3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
+  3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
+  3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
+  3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
+  2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
+  3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
+  2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
+  1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
+  2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
+  2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
+  2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
+  1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
+  2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
+  2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
+  2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
+  1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
+  2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
+  2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2>
+  3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
+  2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
+  2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
+  3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
+  4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
+  3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
+  3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
+  3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
+  3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
+  3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
+  3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
+  2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
+  3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
+  3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
+  2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
+  2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
+  2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
+  3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
+  3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
+  3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
+  3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
+  2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
+  3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
+  2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
+  2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
+  3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
+  3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
+  3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5>
+  3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
+  3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5>
+  2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
+  2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
+  2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
+  2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
+  2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
+  3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
+  3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
+  3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
+  2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
+  2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
+  3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
+  1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
+  1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
+  2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
+  2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
+  2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
+  3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
+  2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
+  2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
+  2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
+  2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
+  2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
+  1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
+  2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
+  2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
+  2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
+  1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
+  2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
+  2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
+  1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
+  1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
+  2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
+  2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
+  2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
+  3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
+  2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
+  2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
+  2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
+  2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
+  2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
+  2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
+  3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
+  2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
+  3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
+  2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
+  2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
+  2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
+  2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+  2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
+  1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
+  2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2>
+  2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
+  2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
+  1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
+  2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
+  3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
+  2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
+  1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
+  3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
+  3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
+  3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
+  3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
+  2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
+  3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
+  3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
+  2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
+  2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
+  2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
+  4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
+  2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
+  3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
+  2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
+  2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
+  2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
+  4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
+  2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
+  2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+  3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
+  3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
+  3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
+  3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
+  3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
+  3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
+  4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
+  2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+  3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
+  3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
+  3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
+  3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
+  3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
+  3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
+  2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
+  2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
+  2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
+  2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
+  2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
+  3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
+  2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
+  2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
+  2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
+  3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
+  2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
+  2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
+  1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
+  2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
+  2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
+  2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
+  1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
+  2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
+  2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
+  2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
+  1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
+  2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0>
+  2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
+  2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0>
+  3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
+  2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
+  2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
+  2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
+  3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
+  2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
+  2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1>
+  3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
+  2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
+  3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
+  2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1>
+  3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
+  3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
+  2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
+  2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1>
+  2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
+  1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
+  3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
+  2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
+  2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
+  2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
+  2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
+  3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
+  1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2>
+  3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
+  3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
+  3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3>
+  3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
+  3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
+  3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
+  2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+  3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
+  2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
+  3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
+  2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
+  2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
+  3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
+  3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
+  2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
+  3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
+  3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
+  2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
+  3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
+  3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
+  3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5>
+  3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
+  2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
+  3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
+  2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
+  2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
+  2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
+  3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
+  2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
+  3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
+  3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
+  3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
+  2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
+  3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
+  2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
+  2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
+  3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
+  3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
+  3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
+  3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
+  3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
+  3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
+  3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
+  2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
+  2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
+  2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u>
+  1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
+  2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u>
+  2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
+  2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u>
+  2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
+  2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
+  2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
+  1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u>
+  135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS
+  1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
+  1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS
+  2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
+  1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
+  2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
+  3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+  2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
+  135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS
+  1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
+  1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS
+  1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+  2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+  1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
+  1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS
+  2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
+  2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+  1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS
+  1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
+  1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
+  1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS
+  835584U, // <0,u,2,3>: Cost 0 copy LHS
+  1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
+  3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
+  1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS
+  1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
+  835584U, // <0,u,2,u>: Cost 0 copy LHS
+  2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
+  2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+  2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
+  2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+  2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
+  2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+  2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+  2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
+  2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+  2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
+  2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+  3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+  2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
+  2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
+  1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
+  1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS
+  2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
+  1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS
+  2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
+  2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
+  3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
+  2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
+  2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
+  2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
+  1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+  2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
+  1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+  2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
+  2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6>
+  2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
+  2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
+  2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
+  2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
+  2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
+  1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
+  1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
+  2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
+  2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
+  2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
+  2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
+  2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
+  2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
+  2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
+  2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
+  2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
+  135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS
+  1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
+  1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS
+  835584U, // <0,u,u,3>: Cost 0 copy LHS
+  1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
+  1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
+  1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS
+  1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
+  835584U, // <0,u,u,u>: Cost 0 copy LHS
+  2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
+  1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
+  2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
+  2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
+  2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
+  2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
+  3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
+  3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
+  1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
+  2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
+  2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
+  1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+  3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
+  2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
+  2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
+  2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
+  3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
+  1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
+  2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
+  2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
+  2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
+  2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
+  2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
+  3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
+  2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
+  2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
+  2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
+  3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+  3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
+  67944550U, // <1,0,3,2>: Cost 1 vrev LHS
+  2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
+  2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
+  4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
+  3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
+  2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
+  68386972U, // <1,0,3,u>: Cost 1 vrev LHS
+  2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
+  2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
+  2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
+  3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
+  3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
+  2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
+  3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
+  3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
+  2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
+  4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
+  2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
+  3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+  3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
+  2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5>
+  3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
+  3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
+  3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
+  3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
+  3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
+  2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
+  3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
+  3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
+  3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
+  3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
+  3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
+  2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
+  2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
+  2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
+  4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
+  2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7>
+  3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
+  3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
+  3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
+  2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
+  3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
+  2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
+  3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+  1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
+  67985515U, // <1,0,u,2>: Cost 1 vrev LHS
+  2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
+  2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
+  2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
+  2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
+  2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
+  68427937U, // <1,0,u,u>: Cost 1 vrev LHS
+  1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
+  1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
+  2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
+  2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
+  2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
+  2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
+  3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
+  3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
+  1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
+  1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+  202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS
+  2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
+  2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
+  1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
+  2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
+  2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+  202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS
+  2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
+  2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
+  2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
+  2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
+  2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
+  3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
+  2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
+  3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
+  2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
+  2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
+  3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
+  4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
+  2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS
+  2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
+  2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
+  3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
+  2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
+  2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS
+  2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
+  2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4>
+  3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
+  3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
+  2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
+  1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
+  2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
+  3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
+  1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
+  2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
+  2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
+  4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
+  2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
+  2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5>
+  2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
+  2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
+  2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+  2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
+  3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
+  2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
+  2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
+  3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
+  3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
+  4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
+  2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
+  2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
+  2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
+  2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
+  2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+  3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
+  4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
+  2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
+  3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7>
+  3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
+  2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
+  2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+  1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
+  202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS
+  2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
+  2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS
+  1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
+  2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
+  2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+  202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS
+  2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
+  1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
+  2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
+  2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
+  2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
+  3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
+  2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
+  2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
+  1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
+  2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
+  2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
+  2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
+  2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
+  2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
+  2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
+  2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
+  3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
+  2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
+  2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
+  2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
+  2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
+  2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
+  3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
+  3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
+  2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
+  2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
+  2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
+  403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS
+  1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
+  403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS
+  1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+  1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+  403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS
+  2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
+  3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
+  3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4>
+  2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
+  2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
+  1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
+  2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
+  2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
+  1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
+  2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
+  2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
+  3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
+  2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+  2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
+  2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
+  2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
+  2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
+  2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
+  2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
+  2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
+  2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
+  2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
+  3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
+  3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
+  2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
+  2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
+  2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
+  1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
+  2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
+  3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
+  2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
+  2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
+  3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
+  3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
+  2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
+  1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
+  403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS
+  1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
+  403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS
+  1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
+  1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+  403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS
+  2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
+  1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
+  2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
+  2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
+  2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
+  3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
+  3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
+  3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
+  1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
+  2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
+  2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
+  2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
+  1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS
+  2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
+  2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
+  3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
+  2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
+  1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS
+  2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+  2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
+  2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+  2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+  2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+  3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
+  2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+  3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
+  2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+  1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
+  1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
+  2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
+  2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
+  1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
+  2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+  2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
+  3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
+  1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
+  2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
+  2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
+  3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
+  2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4>
+  2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
+  1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
+  2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
+  3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
+  1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
+  2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
+  2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
+  2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
+  2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5>
+  2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
+  2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
+  2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
+  1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS
+  1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS
+  2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
+  3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
+  2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
+  3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
+  2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
+  3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
+  2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
+  2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+  2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+  2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
+  2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
+  3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
+  2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
+  2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
+  2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
+  3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
+  2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
+  2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
+  1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
+  1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
+  2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
+  1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS
+  1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
+  1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
+  2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
+  1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS
+  1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS
+  2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
+  2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
+  3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
+  2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+  2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
+  1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
+  2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
+  3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
+  1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
+  3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
+  2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
+  3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
+  3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
+  3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
+  2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
+  3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+  3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
+  2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
+  3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
+  3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
+  3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
+  2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
+  3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
+  2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+  3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
+  3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
+  2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
+  2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
+  2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
+  2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
+  3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
+  2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
+  3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
+  3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
+  3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
+  3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
+  2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
+  3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
+  3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4>
+  3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
+  2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
+  2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
+  2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
+  3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
+  2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
+  2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
+  2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
+  2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
+  3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
+  2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
+  2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+  1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
+  1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
+  2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
+  2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
+  3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
+  2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
+  2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
+  3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
+  3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
+  2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+  2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
+  2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
+  3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
+  3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
+  3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
+  3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
+  3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
+  2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
+  3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
+  2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
+  2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
+  2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
+  2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
+  2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
+  2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
+  1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
+  1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+  1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
+  2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
+  1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
+  2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
+  3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
+  1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+  2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
+  2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
+  3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
+  1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
+  2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
+  2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
+  2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
+  2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
+  2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
+  2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
+  2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
+  3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
+  2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
+  3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
+  2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
+  2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
+  2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
+  3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
+  3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
+  2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
+  4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
+  2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
+  2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
+  2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
+  3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
+  2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
+  2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
+  3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
+  4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
+  2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS
+  2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS
+  2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
+  2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+  3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
+  3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
+  2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
+  1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
+  2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
+  2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
+  1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
+  2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
+  2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
+  3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
+  4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
+  2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
+  2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
+  2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
+  2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
+  2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
+  2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
+  2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
+  2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
+  3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
+  3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
+  3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6>
+  2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
+  2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
+  2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
+  2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
+  2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
+  2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
+  2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
+  2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
+  2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
+  3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
+  2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
+  2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
+  2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
+  1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
+  2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
+  2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
+  1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
+  1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
+  2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
+  2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS
+  2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS
+  3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
+  2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
+  2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
+  3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
+  2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
+  2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
+  2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
+  4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
+  2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
+  3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
+  2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
+  3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
+  3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
+  2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
+  3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
+  3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
+  2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+  2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
+  2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
+  3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
+  3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
+  3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
+  3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
+  3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
+  2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
+  2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+  2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
+  2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
+  3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
+  2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
+  3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
+  2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
+  4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
+  2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
+  3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+  3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
+  3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
+  3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
+  3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
+  3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4>
+  3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
+  2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
+  3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
+  4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
+  2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
+  3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
+  3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
+  3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
+  3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
+  3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
+  3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
+  2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
+  2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
+  2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
+  2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
+  3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
+  3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
+  3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
+  3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
+  3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
+  2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
+  2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
+  2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
+  1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
+  2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
+  2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
+  3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
+  2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
+  2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
+  2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
+  4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
+  1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
+  1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
+  2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
+  3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
+  3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
+  2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
+  2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
+  2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
+  3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+  1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
+  2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
+  2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
+  3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
+  3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
+  2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
+  3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
+  2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
+  2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
+  2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
+  2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
+  2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
+  3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
+  2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1>
+  2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
+  2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
+  2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
+  2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
+  2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
+  2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
+  3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
+  3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
+  2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
+  2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
+  3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
+  2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
+  3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
+  2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
+  1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
+  2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
+  2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
+  2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
+  1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
+  1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
+  2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
+  2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
+  1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
+  2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
+  3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
+  3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4>
+  3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4>
+  3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
+  2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
+  3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
+  3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
+  2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
+  2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
+  2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
+  3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
+  2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
+  2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
+  4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
+  2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
+  3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
+  2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
+  2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
+  2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
+  3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
+  3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
+  3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
+  3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
+  3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
+  2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
+  2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
+  2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
+  3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
+  3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
+  3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
+  3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
+  2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
+  3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
+  2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
+  2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
+  1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
+  2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
+  2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
+  2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
+  1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
+  1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
+  2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
+  2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
+  1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
+  1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
+  1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
+  2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
+  2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
+  1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
+  1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
+  2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
+  2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
+  1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
+  1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+  202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS
+  1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+  1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS
+  1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
+  3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+  2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+  202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS
+  2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+  2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
+  2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+  2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+  2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+  2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+  2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+  2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+  2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+  403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS
+  1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  115726126U, // <1,u,3,2>: Cost 1 vrev LHS
+  2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS
+  403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS
+  1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
+  1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS
+  403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS
+  2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
+  2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
+  2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4>
+  2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
+  2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
+  1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
+  2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
+  2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
+  1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
+  2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
+  2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
+  3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+  2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+  2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
+  2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+  1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS
+  1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS
+  2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
+  2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
+  2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
+  2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
+  2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
+  3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
+  2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
+  2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+  2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+  1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
+  2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
+  2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
+  2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
+  2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
+  2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
+  2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
+  2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
+  1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
+  403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS
+  202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS
+  115767091U, // <1,u,u,2>: Cost 1 vrev LHS
+  1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS
+  403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS
+  1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
+  1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS
+  403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS
+  2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
+  2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
+  1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
+  2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
+  2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
+  3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
+  2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
+  4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
+  1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
+  2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
+  2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
+  1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
+  2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
+  2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
+  3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
+  2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
+  3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
+  1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
+  1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
+  2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
+  2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
+  2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
+  1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
+  2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
+  2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
+  2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
+  1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
+  2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+  2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
+  2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
+  4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
+  3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
+  3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
+  4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
+  3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
+  2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
+  2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
+  2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
+  2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
+  3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
+  2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
+  2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
+  2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+  3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
+  2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
+  3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
+  3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5>
+  2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
+  3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5>
+  2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
+  3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
+  3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
+  2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+  2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5>
+  4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
+  2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
+  2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
+  3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
+  2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6>
+  3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
+  3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
+  3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
+  2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6>
+  2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+  3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
+  2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
+  3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
+  3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
+  3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
+  3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
+  3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
+  2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+  1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
+  2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
+  1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
+  2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
+  1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
+  2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
+  2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+  2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+  1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
+  2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
+  2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
+  3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
+  1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
+  2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
+  2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
+  3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
+  2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
+  1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
+  2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
+  2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
+  2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
+  2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
+  2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
+  3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
+  3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
+  3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
+  2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
+  2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
+  3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
+  2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
+  2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
+  2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
+  3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
+  3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
+  2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
+  2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
+  2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
+  2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
+  2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
+  2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
+  2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
+  2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
+  2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+  4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
+  2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
+  2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
+  3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
+  3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
+  1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
+  3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
+  2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
+  3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
+  3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
+  1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4>
+  2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
+  2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
+  3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
+  2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
+  2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
+  3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
+  3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
+  3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
+  2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
+  2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
+  3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
+  2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
+  3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
+  2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
+  4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
+  3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
+  3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
+  2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
+  2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
+  3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
+  3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
+  2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7>
+  3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
+  3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7>
+  3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
+  3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
+  2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
+  2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
+  2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
+  2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
+  1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u>
+  2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
+  2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
+  2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+  2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
+  1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u>
+  1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
+  1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
+  2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
+  2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
+  2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
+  3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
+  3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
+  3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
+  1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
+  2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
+  2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
+  2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
+  2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
+  3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
+  2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
+  3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
+  3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
+  2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
+  1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
+  2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
+  269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS
+  2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
+  1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
+  2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
+  2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
+  2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+  269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS
+  2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
+  2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
+  2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
+  1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS
+  2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
+  3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
+  2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
+  2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
+  1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS
+  2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
+  3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
+  2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
+  3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
+  2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
+  1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
+  2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
+  3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
+  1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
+  3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
+  2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
+  2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
+  4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
+  2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
+  2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
+  2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
+  2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+  2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
+  3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
+  3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
+  2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
+  2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
+  3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
+  3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
+  3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
+  2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
+  2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
+  2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
+  3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
+  2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
+  4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
+  2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
+  3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7>
+  3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
+  2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
+  2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
+  1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
+  1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
+  269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS
+  1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS
+  1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
+  1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
+  2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
+  2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+  269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS
+  1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+  470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS
+  1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
+  1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+  2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+  2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
+  470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS
+  1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
+  1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
+  2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
+  1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+  2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
+  2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+  1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+  1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
+  2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
+  1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
+  1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+  1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
+  1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
+  1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
+  1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
+  2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3>
+  2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+  1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
+  1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
+  1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
+  2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+  2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
+  1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
+  470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS
+  1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
+  470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS
+  2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
+  1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+  2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
+  2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
+  1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+  1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+  1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
+  2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+  2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
+  1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+  2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
+  2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+  2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
+  1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
+  1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+  2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
+  2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
+  2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+  1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+  2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+  2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
+  1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+  1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+  470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS
+  1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+  1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
+  1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+  470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS
+  1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+  1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
+  470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS
+  2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
+  2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
+  2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
+  2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
+  2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
+  2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
+  1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+  3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
+  1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+  2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
+  3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
+  2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
+  2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
+  3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
+  3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
+  3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
+  3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
+  2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
+  2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
+  3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
+  2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
+  2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
+  2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
+  2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
+  3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
+  3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
+  2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
+  2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
+  4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
+  3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
+  3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
+  3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
+  2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+  3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
+  2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+  2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
+  3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
+  3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
+  3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
+  2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
+  2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
+  2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
+  4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
+  2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
+  2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
+  2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
+  2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
+  2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
+  2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
+  3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
+  1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+  1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
+  2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
+  2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
+  2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
+  1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
+  2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+  2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
+  2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
+  1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
+  2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+  3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
+  3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
+  3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
+  3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
+  2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
+  2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
+  3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
+  2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+  1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
+  2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
+  2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
+  2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
+  1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
+  2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+  1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
+  2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
+  2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
+  2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
+  2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
+  2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
+  3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
+  3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+  2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
+  3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
+  3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
+  2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
+  2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
+  2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
+  3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
+  3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
+  2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
+  2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
+  3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
+  4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
+  2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
+  3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
+  2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
+  3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
+  3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
+  3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+  3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
+  2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
+  2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
+  2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
+  3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
+  2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
+  2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
+  2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
+  2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
+  2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
+  3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
+  2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
+  2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
+  2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
+  2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
+  3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
+  3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+  2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
+  2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
+  3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
+  3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
+  3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
+  2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
+  2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
+  4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
+  2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
+  2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
+  2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
+  3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
+  2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
+  3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
+  2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
+  3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
+  4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
+  3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
+  3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
+  2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
+  2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
+  2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
+  3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
+  2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
+  2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
+  2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
+  4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
+  2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
+  2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
+  2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
+  2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
+  2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
+  2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
+  2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
+  2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
+  2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
+  2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
+  1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
+  3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
+  1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
+  3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
+  2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
+  2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
+  1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
+  2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
+  2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
+  2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+  2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
+  2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
+  3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
+  2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
+  2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
+  2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
+  2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
+  2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
+  2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
+  2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
+  2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
+  2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
+  2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
+  2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
+  2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
+  3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
+  2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
+  2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
+  2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
+  3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
+  3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+  1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS
+  1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS
+  2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
+  2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
+  2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
+  3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
+  2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
+  1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
+  2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
+  4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
+  1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
+  3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
+  2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
+  2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
+  3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
+  2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
+  2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
+  2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
+  2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
+  2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
+  2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
+  3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
+  2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
+  3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
+  2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
+  3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
+  2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
+  2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
+  2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
+  2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
+  2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
+  2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
+  2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
+  2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
+  3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
+  2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
+  2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
+  2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
+  2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
+  1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
+  2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+  1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
+  1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
+  2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
+  1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS
+  1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS
+  2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
+  1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
+  2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
+  2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
+  2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
+  2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
+  2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
+  2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
+  1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
+  2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+  3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
+  3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
+  3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
+  3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
+  3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
+  2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
+  3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
+  2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+  3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
+  3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
+  2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2>
+  3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
+  3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
+  2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
+  2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
+  3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
+  2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
+  1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
+  2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
+  2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
+  2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
+  1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
+  2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
+  1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
+  2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
+  1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
+  2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
+  3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
+  3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
+  2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
+  3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
+  2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
+  2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
+  3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
+  2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
+  2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
+  3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
+  3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
+  3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
+  2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
+  3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
+  3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
+  2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
+  2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
+  2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
+  3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
+  2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
+  3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
+  2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
+  2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
+  2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
+  3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
+  2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
+  2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
+  3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
+  3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
+  3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
+  2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
+  3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
+  2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
+  2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
+  2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
+  1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
+  1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
+  2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
+  2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
+  1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
+  2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
+  1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
+  2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
+  1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
+  1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+  470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS
+  1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
+  1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+  1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
+  3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+  470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS
+  1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+  1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+  2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
+  1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
+  1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
+  2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+  269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS
+  1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
+  2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+  269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS
+  1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
+  1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
+  1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS
+  1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
+  1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
+  1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS
+  1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS
+  1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
+  1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
+  2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
+  1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4>
+  1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
+  470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS
+  1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+  470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS
+  2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
+  1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+  2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
+  2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
+  1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+  1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
+  2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
+  1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+  2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
+  1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
+  2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+  1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
+  1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+  2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
+  2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
+  2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+  1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+  2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+  2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
+  1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+  1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
+  470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS
+  269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS
+  1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS
+  1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
+  470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS
+  1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS
+  470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS
+  1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+  1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+  1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+  3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
+  2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
+  3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
+  3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
+  3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
+  1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
+  1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
+  2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
+  537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS
+  2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
+  1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
+  2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
+  2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
+  2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
+  537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS
+  1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+  2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
+  2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+  1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
+  2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
+  2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
+  1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+  2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
+  2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+  2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
+  2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
+  2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
+  3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
+  2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
+  3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
+  2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
+  2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
+  1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+  1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+  3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
+  2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
+  1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
+  2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
+  3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
+  1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+  2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
+  2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
+  3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
+  3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
+  2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
+  2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
+  2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
+  2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
+  3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
+  2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
+  2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
+  2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
+  3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
+  3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
+  3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
+  2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
+  2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
+  2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
+  2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
+  2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
+  2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
+  3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
+  2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
+  3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
+  3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
+  2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
+  2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
+  1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
+  1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+  537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS
+  2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+  1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+  1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
+  2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
+  2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
+  537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS
+  2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
+  2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
+  2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
+  1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+  2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
+  2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
+  3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
+  3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
+  1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+  2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
+  1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
+  2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
+  1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+  2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
+  2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+  3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
+  3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
+  1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
+  2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
+  2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+  2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
+  1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
+  2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
+  2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+  3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
+  2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
+  1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
+  1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
+  1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+  2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+  2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
+  1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
+  1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+  2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+  2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
+  1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+  2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
+  2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
+  2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
+  2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
+  2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
+  2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
+  2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
+  3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
+  2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
+  2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
+  2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
+  2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
+  1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+  2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
+  3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
+  3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
+  2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+  1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+  3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
+  2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+  2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
+  2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+  3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
+  2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+  3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
+  2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
+  2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
+  3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
+  2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
+  4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
+  3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+  3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
+  2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7>
+  3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7>
+  3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
+  3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
+  1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
+  1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
+  2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+  1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
+  1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
+  1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+  2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+  2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+  1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
+  2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
+  1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
+  2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
+  2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+  2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
+  2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
+  2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+  2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0>
+  1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
+  1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
+  2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
+  2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
+  2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+  2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
+  2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
+  2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+  3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
+  1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
+  2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
+  2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
+  1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
+  1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+  2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
+  2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+  2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
+  3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
+  1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
+  1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
+  2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
+  2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
+  2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
+  1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
+  2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
+  2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
+  2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
+  1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
+  2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
+  2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
+  2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
+  2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+  2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
+  1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
+  2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
+  2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4>
+  1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
+  2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
+  2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
+  2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+  2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+  1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
+  2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
+  2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+  2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
+  1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
+  2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
+  2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
+  2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
+  1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+  2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
+  2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+  2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+  2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
+  1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+  2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
+  2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
+  4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
+  2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
+  2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
+  2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7>
+  2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7>
+  2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
+  2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
+  1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
+  1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
+  1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
+  1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+  1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
+  1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
+  2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
+  2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
+  1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
+  2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
+  1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
+  2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
+  2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
+  2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
+  2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
+  3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
+  4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
+  1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
+  2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
+  1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
+  2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
+  2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
+  2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
+  2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
+  3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
+  2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
+  1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
+  2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
+  2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
+  1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
+  2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
+  2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
+  2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
+  2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
+  2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
+  1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
+  1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
+  2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
+  2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
+  336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS
+  1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
+  2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
+  2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
+  2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
+  336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS
+  2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
+  2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
+  2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
+  2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
+  2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
+  1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
+  2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
+  4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
+  1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
+  2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
+  2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
+  2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
+  2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
+  2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
+  3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
+  2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
+  2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+  2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
+  2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
+  3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
+  2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
+  2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
+  2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
+  3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
+  2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
+  2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
+  2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
+  2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
+  2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
+  2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
+  2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
+  2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
+  2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
+  3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
+  3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
+  2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
+  1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
+  1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
+  1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
+  336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS
+  1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
+  1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
+  2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
+  2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+  336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS
+  2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
+  1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
+  2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
+  3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
+  2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
+  1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
+  1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
+  3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
+  1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
+  2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
+  2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
+  1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
+  2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
+  2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
+  2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
+  2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+  2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
+  1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
+  3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
+  2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
+  2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
+  2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
+  2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
+  2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
+  2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
+  2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
+  2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
+  2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
+  2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
+  2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
+  2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
+  2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
+  2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
+  3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
+  3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
+  2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
+  2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
+  2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
+  2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
+  2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+  1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+  1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
+  1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
+  3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
+  1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
+  1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
+  2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+  1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
+  2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
+  1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
+  2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
+  537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS
+  2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
+  537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS
+  2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
+  2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
+  2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
+  2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
+  1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+  2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+  2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
+  2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
+  1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
+  2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
+  3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
+  2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
+  3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
+  2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
+  2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
+  2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
+  2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
+  2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
+  1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
+  1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
+  1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
+  2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
+  1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
+  1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
+  537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS
+  2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
+  537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS
+  3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
+  2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
+  2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
+  3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
+  2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
+  2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+  2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
+  2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0>
+  2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
+  2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
+  3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
+  2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
+  2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
+  2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
+  2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
+  3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
+  1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
+  1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
+  3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
+  2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
+  3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
+  2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
+  2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
+  2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
+  3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
+  2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
+  2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
+  3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
+  2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
+  3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
+  2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
+  2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
+  2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
+  4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
+  3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
+  3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
+  2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
+  3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
+  2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
+  3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
+  2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
+  2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
+  2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+  1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
+  1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
+  2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
+  2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
+  2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
+  2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
+  2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
+  1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+  2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
+  1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
+  1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
+  2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
+  2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
+  2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
+  2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
+  2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
+  2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
+  2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
+  1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
+  1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
+  1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
+  1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
+  2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
+  2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
+  1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
+  1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+  2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
+  2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
+  1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
+  1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
+  1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
+  2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
+  2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
+  1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
+  1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
+  2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+  1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
+  1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
+  2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
+  2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
+  2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
+  3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
+  2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
+  2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
+  2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
+  2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+  2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
+  3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
+  3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
+  2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
+  3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
+  2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
+  3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
+  2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
+  2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
+  2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
+  2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
+  2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
+  2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
+  3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
+  2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
+  2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
+  2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
+  1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
+  1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
+  3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
+  3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
+  3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
+  2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
+  2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
+  3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
+  3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
+  2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+  2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
+  3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
+  2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
+  2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
+  3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
+  2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
+  2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
+  2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
+  2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
+  2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
+  2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
+  3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
+  2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
+  2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+  2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
+  3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
+  4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
+  2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
+  2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
+  2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
+  2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
+  2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
+  3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
+  2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
+  2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
+  1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
+  1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
+  1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
+  2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
+  2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
+  2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
+  1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
+  2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
+  2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
+  2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
+  1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
+  1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
+  2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
+  2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
+  2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
+  1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
+  2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
+  1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
+  1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
+  2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
+  1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
+  3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
+  2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
+  2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
+  2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
+  2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
+  1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
+  2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
+  2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
+  2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
+  2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
+  1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
+  2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
+  2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
+  1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
+  2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
+  3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
+  2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
+  2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
+  2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
+  2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
+  1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
+  2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
+  1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
+  2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
+  2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
+  2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
+  2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
+  2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
+  2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
+  2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
+  2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
+  2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
+  2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
+  2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
+  2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
+  3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
+  2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
+  1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
+  2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
+  2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
+  1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
+  2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
+  2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
+  3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
+  2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
+  2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5>
+  2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
+  2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
+  2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
+  2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
+  2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
+  2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
+  2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
+  2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
+  2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
+  2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
+  2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
+  2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
+  2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
+  2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
+  2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
+  2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
+  2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
+  2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
+  2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
+  2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
+  1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
+  2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
+  1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
+  2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
+  2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
+  1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
+  1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
+  1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
+  1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+  1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
+  1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
+  1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+  2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
+  1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
+  1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
+  2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+  1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
+  1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
+  1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
+  537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS
+  1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
+  1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
+  1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
+  2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
+  1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
+  537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS
+  1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
+  1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
+  1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
+  1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
+  1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
+  1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
+  1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
+  1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
+  1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
+  2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
+  336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS
+  1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
+  1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+  2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
+  2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+  336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS
+  2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
+  1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
+  1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+  2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
+  1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+  1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
+  1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
+  1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
+  1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
+  1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
+  2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
+  1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
+  1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+  1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
+  1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+  537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS
+  1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
+  537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS
+  2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
+  2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
+  2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
+  1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+  1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+  2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
+  1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
+  1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
+  1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
+  1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
+  2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
+  3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+  1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
+  1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+  2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
+  1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
+  1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
+  1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
+  537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS
+  336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS
+  1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
+  1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
+  537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS
+  1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
+  537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS
+  2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
+  2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
+  2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
+  3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
+  2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
+  3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
+  3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0>
+  3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
+  2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
+  2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
+  2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
+  1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
+  2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
+  2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
+  3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
+  2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
+  1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
+  3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
+  2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
+  2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
+  2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
+  3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
+  2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+  3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
+  2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
+  3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
+  2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+  2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
+  3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
+  3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
+  3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
+  3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
+  3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
+  2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+  2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
+  2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
+  2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
+  3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
+  2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
+  2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
+  3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
+  3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
+  2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
+  2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
+  1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS
+  2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
+  2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
+  2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
+  3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
+  3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
+  2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
+  1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS
+  2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
+  3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
+  1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS
+  2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
+  2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
+  2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6>
+  3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
+  2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
+  1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS
+  3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
+  3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
+  4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
+  3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
+  3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
+  3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
+  3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
+  3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
+  4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
+  2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
+  1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS
+  1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
+  2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
+  2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
+  2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+  2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
+  1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
+  2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
+  3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
+  2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
+  2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
+  3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
+  3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
+  3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
+  2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
+  3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
+  2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
+  2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
+  2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
+  3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
+  3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
+  3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
+  3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
+  2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
+  2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
+  2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
+  3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
+  1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
+  2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
+  3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
+  3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
+  3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
+  1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
+  2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
+  2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
+  2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
+  2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
+  2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
+  2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
+  2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
+  2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
+  3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
+  3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
+  3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
+  3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
+  3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
+  2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
+  3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4>
+  3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
+  2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
+  1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+  2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
+  2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
+  2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
+  1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+  2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
+  2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
+  2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+  1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
+  2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
+  2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
+  4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
+  3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
+  2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
+  2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6>
+  4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
+  3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
+  2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
+  2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+  3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
+  3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
+  2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
+  3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
+  3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
+  3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
+  3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
+  2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+  1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
+  2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
+  2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
+  1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
+  1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
+  2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
+  2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+  1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
+  2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
+  2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
+  2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
+  3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
+  2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
+  3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
+  2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
+  3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
+  2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
+  3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
+  3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
+  3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
+  3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
+  2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+  3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
+  3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
+  3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
+  2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+  3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
+  3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
+  2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
+  2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
+  3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
+  3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
+  3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
+  3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
+  2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
+  2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
+  3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
+  3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
+  2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
+  2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
+  2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
+  3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
+  2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
+  2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
+  2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
+  2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
+  2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
+  2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
+  2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
+  2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
+  2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4>
+  3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
+  2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
+  2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
+  2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
+  2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
+  2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
+  2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
+  3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
+  2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
+  3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
+  2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
+  1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
+  2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
+  2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
+  2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
+  1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
+  2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6>
+  2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
+  2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
+  1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
+  2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+  3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
+  3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
+  2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
+  4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
+  3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7>
+  3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
+  3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
+  2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+  1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
+  2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
+  2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
+  2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
+  1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
+  2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
+  2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u>
+  2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
+  1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
+  3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
+  2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
+  2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
+  3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
+  3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
+  2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
+  3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0>
+  3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
+  2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
+  2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
+  3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
+  2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
+  2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
+  3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
+  3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
+  3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1>
+  3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
+  2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
+  3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
+  2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
+  3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
+  2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
+  3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
+  2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
+  3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
+  3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
+  2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
+  3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
+  3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
+  3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
+  2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+  2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
+  3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
+  3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
+  3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
+  2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+  2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
+  2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
+  2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
+  3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
+  2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
+  2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
+  2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4>
+  3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
+  2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
+  2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
+  2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
+  2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
+  2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
+  2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
+  4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
+  3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5>
+  2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
+  2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
+  2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
+  2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
+  2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
+  2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
+  2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
+  1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
+  4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
+  2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
+  1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6>
+  3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
+  3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
+  3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
+  3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
+  3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
+  3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
+  3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
+  3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
+  3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
+  2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
+  2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
+  2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
+  2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
+  2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
+  1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
+  2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u>
+  2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
+  1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u>
+  2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
+  1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
+  2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+  3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
+  2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
+  2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
+  2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
+  3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
+  1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
+  2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
+  2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
+  2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
+  2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
+  2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
+  2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
+  3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
+  3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
+  2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
+  3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
+  3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
+  2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
+  2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
+  2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
+  3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
+  2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
+  3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
+  2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
+  2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
+  3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
+  3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
+  2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+  2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
+  3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
+  3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
+  3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
+  2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+  1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
+  2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
+  2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
+  2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
+  161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS
+  1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
+  2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
+  2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+  161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS
+  2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
+  2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
+  3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
+  2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
+  2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
+  1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS
+  1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
+  1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+  2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
+  3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
+  2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
+  2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
+  2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
+  2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6>
+  1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS
+  2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
+  1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS
+  2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
+  3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
+  3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
+  3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
+  2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+  2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
+  2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
+  2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
+  2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+  1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
+  1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
+  2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+  2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
+  161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS
+  1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS
+  1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
+  161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS
+  2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
+  1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
+  2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
+  2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
+  2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
+  3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
+  2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
+  2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
+  1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
+  2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
+  2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
+  2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
+  2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
+  3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
+  2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
+  3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
+  2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
+  2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
+  2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
+  3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
+  2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
+  1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
+  2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
+  3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
+  2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
+  4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
+  1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
+  2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
+  3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
+  2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
+  2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
+  2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
+  2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
+  2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
+  2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
+  2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
+  2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
+  3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
+  2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
+  2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
+  2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
+  1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
+  2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
+  2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
+  1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
+  1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
+  2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
+  2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
+  2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
+  1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
+  2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
+  2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
+  2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
+  1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
+  1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
+  2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
+  2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
+  1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
+  1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
+  3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
+  2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
+  27705344U, // <4,5,6,7>: Cost 0 copy RHS
+  27705344U, // <4,5,6,u>: Cost 0 copy RHS
+  2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
+  2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
+  2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
+  2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
+  2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
+  2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
+  2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
+  2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
+  2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
+  1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
+  1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
+  2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
+  1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
+  1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
+  1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
+  2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
+  27705344U, // <4,5,u,7>: Cost 0 copy RHS
+  27705344U, // <4,5,u,u>: Cost 0 copy RHS
+  2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
+  1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
+  1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
+  3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
+  2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
+  3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
+  2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0>
+  4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
+  1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
+  2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
+  2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
+  2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
+  2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
+  3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
+  2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
+  3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
+  4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
+  2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
+  2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
+  2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
+  2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
+  2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
+  2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
+  2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
+  2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
+  2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
+  2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
+  2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
+  2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
+  3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
+  2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
+  2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
+  2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
+  3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
+  2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
+  2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
+  2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
+  2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
+  2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
+  2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
+  2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
+  1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
+  1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS
+  2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
+  1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
+  2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
+  2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
+  2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
+  3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
+  2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
+  2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
+  2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
+  2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
+  2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
+  1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
+  2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
+  2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
+  2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
+  1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
+  2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6>
+  2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
+  2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
+  1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
+  2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
+  2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
+  3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
+  2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
+  2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
+  2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+  3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
+  2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+  2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
+  1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
+  1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
+  1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS
+  2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
+  1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
+  1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
+  1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS
+  2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
+  1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
+  3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
+  2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
+  2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
+  3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
+  3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
+  2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0>
+  3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
+  3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
+  2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
+  2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
+  3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
+  2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
+  3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
+  3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
+  3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
+  3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
+  3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
+  2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1>
+  3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
+  3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
+  3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
+  2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
+  3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
+  2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
+  3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
+  3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
+  2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
+  3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
+  3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
+  3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
+  3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
+  3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
+  2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
+  2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
+  3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
+  2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
+  2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
+  3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
+  3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
+  3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
+  2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4>
+  2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
+  2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4>
+  3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
+  2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
+  2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
+  3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
+  2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
+  3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
+  2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
+  2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
+  2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
+  2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
+  2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
+  1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
+  3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
+  2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
+  2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
+  1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
+  1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
+  2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
+  3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+  1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
+  3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
+  3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
+  3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
+  3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
+  3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
+  2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
+  3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
+  2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
+  2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
+  1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
+  2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
+  2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
+  2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
+  1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
+  1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
+  2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
+  3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+  1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
+  2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
+  1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
+  1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
+  2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
+  2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
+  2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0>
+  2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
+  2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
+  1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
+  2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
+  2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
+  1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
+  2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
+  2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
+  3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
+  2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
+  1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
+  2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
+  2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
+  1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
+  2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
+  2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
+  2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
+  2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
+  1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
+  2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
+  2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
+  2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
+  2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
+  2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
+  2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
+  2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
+  2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
+  1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
+  2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
+  2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
+  2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
+  161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS
+  1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
+  1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS
+  2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
+  161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS
+  1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+  1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS
+  2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
+  3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
+  1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+  1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS
+  1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
+  1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+  1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
+  2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
+  1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS
+  1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
+  1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
+  1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
+  1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS
+  27705344U, // <4,u,6,7>: Cost 0 copy RHS
+  27705344U, // <4,u,6,u>: Cost 0 copy RHS
+  2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
+  2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
+  2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
+  2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
+  2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
+  2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+  2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
+  2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+  2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
+  1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
+  1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
+  1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
+  161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS
+  1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
+  1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  27705344U, // <4,u,u,7>: Cost 0 copy RHS
+  27705344U, // <4,u,u,u>: Cost 0 copy RHS
+  2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
+  2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
+  2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
+  3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
+  2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
+  3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
+  3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0>
+  3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
+  2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
+  2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
+  2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
+  1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
+  2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
+  2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
+  3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
+  3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
+  1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
+  2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+  2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
+  2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
+  2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
+  2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
+  2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
+  2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+  2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
+  3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
+  3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
+  2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
+  3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
+  2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
+  3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
+  3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
+  3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
+  2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
+  2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
+  1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+  2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
+  3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
+  2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
+  2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
+  3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
+  2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+  1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+  3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
+  2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
+  3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
+  3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
+  3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
+  2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
+  3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
+  3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
+  2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
+  4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
+  2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+  3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
+  3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
+  3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
+  3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
+  3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
+  2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
+  2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
+  2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
+  2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
+  2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
+  3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
+  2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
+  2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
+  2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
+  4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
+  2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
+  2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
+  1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
+  1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
+  2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
+  2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
+  2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
+  2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+  1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
+  1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
+  2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
+  2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
+  2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
+  2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
+  3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
+  3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0>
+  1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
+  2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
+  2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
+  2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
+  2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
+  2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
+  2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
+  3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
+  3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
+  2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
+  3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2>
+  3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
+  2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
+  2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
+  2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
+  2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
+  2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
+  3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
+  2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
+  2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
+  2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
+  3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
+  2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
+  2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
+  2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
+  3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
+  2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
+  2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
+  1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
+  2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
+  2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
+  2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
+  2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
+  1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
+  2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
+  3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4>
+  1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
+  2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
+  2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
+  3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
+  2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
+  2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
+  2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
+  2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
+  2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
+  2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
+  2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
+  3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
+  2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
+  3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
+  2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
+  2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
+  2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
+  2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
+  2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
+  2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
+  2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
+  2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
+  2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS
+  2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
+  2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
+  4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
+  2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
+  2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS
+  1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
+  1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
+  2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
+  2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS
+  2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
+  1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
+  2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
+  2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
+  2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS
+  3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
+  2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
+  2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
+  3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
+  3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
+  3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
+  3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
+  3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0>
+  2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
+  2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
+  3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
+  3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
+  2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
+  3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
+  3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
+  3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
+  3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1>
+  2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
+  3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
+  3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
+  2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
+  2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
+  2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
+  3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
+  3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
+  3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
+  2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
+  2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
+  2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
+  2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
+  3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
+  1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
+  2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
+  3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
+  3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
+  1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
+  2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
+  3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
+  2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
+  2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
+  2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
+  2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
+  2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
+  3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4>
+  2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
+  2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
+  3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
+  3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
+  2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
+  2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
+  2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
+  3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
+  3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
+  2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
+  3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
+  3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
+  2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
+  2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
+  2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
+  3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
+  3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
+  3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
+  2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
+  2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
+  2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
+  2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
+  2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
+  2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
+  4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
+  3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
+  3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
+  2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
+  2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
+  2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
+  2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
+  2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
+  1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
+  2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
+  2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
+  3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u>
+  1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
+  3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
+  2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
+  3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
+  3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
+  2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
+  3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
+  3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0>
+  2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
+  2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
+  3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
+  3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
+  2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
+  2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
+  3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
+  2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
+  3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
+  2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
+  2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
+  3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
+  3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
+  3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
+  2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
+  2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
+  2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
+  3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
+  3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
+  2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
+  3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
+  2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
+  3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
+  2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
+  2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
+  2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
+  3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
+  3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
+  2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
+  2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
+  3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
+  2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
+  2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
+  2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
+  2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
+  3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
+  2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
+  2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
+  2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
+  2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
+  2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
+  2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
+  2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
+  2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
+  3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
+  2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5>
+  2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
+  2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
+  3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
+  2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
+  2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
+  2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
+  3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
+  4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
+  2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
+  2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
+  1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
+  1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
+  2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
+  2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
+  1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
+  2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
+  2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
+  3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
+  1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
+  1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
+  1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
+  2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
+  2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
+  1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
+  2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
+  2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
+  2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u>
+  1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
+  3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
+  2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
+  3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
+  3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
+  2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
+  2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
+  2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0>
+  3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0>
+  2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
+  2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1>
+  3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
+  3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
+  3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
+  3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
+  2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
+  3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
+  3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1>
+  2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1>
+  3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
+  3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
+  3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
+  2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
+  3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
+  2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
+  3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
+  3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
+  2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
+  3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
+  3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
+  2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
+  3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
+  2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
+  3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
+  2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
+  3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
+  2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
+  2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
+  2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
+  3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
+  3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
+  2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
+  1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
+  2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4>
+  2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
+  1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
+  2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
+  2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
+  2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
+  3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
+  2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
+  2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
+  1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+  3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
+  1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
+  2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
+  2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
+  3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
+  2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
+  2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
+  2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
+  3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
+  2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
+  2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
+  2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
+  2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
+  2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
+  2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
+  2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
+  3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
+  94817590U, // <5,4,7,6>: Cost 1 vrev RHS
+  2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
+  94965064U, // <5,4,7,u>: Cost 1 vrev RHS
+  2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
+  2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
+  2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u>
+  2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
+  2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
+  1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
+  94825783U, // <5,4,u,6>: Cost 1 vrev RHS
+  2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
+  94973257U, // <5,4,u,u>: Cost 1 vrev RHS
+  2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
+  1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
+  2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
+  3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
+  2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
+  2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0>
+  3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
+  4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
+  1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
+  2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
+  2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
+  2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
+  2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
+  2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
+  2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
+  3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
+  2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
+  2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
+  3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
+  3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
+  2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
+  2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
+  3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
+  2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
+  2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
+  4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
+  2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
+  2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
+  3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
+  3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
+  2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
+  2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
+  2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
+  3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
+  3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
+  2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
+  2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
+  2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
+  3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
+  3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
+  1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+  1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
+  2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
+  2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
+  1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
+  1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
+  2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
+  2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
+  1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+  229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS
+  2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
+  2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
+  229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS
+  2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
+  3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
+  2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
+  3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
+  2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
+  2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
+  2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
+  2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
+  2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
+  2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
+  3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
+  2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
+  2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
+  2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
+  3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
+  4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
+  2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS
+  2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS
+  1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
+  2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
+  2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
+  1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
+  229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS
+  2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
+  2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS
+  229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS
+  2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
+  1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
+  2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
+  2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
+  2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
+  3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
+  3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
+  4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
+  1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
+  2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
+  2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
+  2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
+  2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
+  2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
+  2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
+  3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
+  2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+  2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
+  3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
+  3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
+  2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
+  2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
+  2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
+  2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
+  2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
+  2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
+  2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
+  2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
+  3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
+  3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
+  2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
+  1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
+  2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
+  3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
+  4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
+  1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
+  2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
+  3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
+  2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
+  2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
+  2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
+  1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
+  2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
+  2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
+  1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
+  2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
+  2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
+  3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
+  3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
+  2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
+  2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
+  2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
+  2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
+  2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
+  2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
+  3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
+  2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
+  2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
+  2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
+  2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
+  2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
+  2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+  2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
+  430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS
+  1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+  1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+  430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS
+  1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
+  1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
+  1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
+  430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS
+  430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS
+  1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
+  1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+  430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS
+  1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
+  1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
+  1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
+  430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS
+  2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
+  1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
+  2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
+  2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
+  2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
+  3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
+  2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0>
+  1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
+  2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
+  2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
+  1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
+  2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
+  2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
+  2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
+  3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
+  1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
+  2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
+  3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
+  2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
+  2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
+  2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
+  2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
+  2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
+  3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
+  2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
+  2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
+  2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
+  3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
+  2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
+  2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
+  2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
+  3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
+  2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
+  2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
+  2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
+  2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
+  3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
+  2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
+  2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
+  1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
+  2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+  2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
+  1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
+  2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
+  2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
+  2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
+  2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
+  2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
+  2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
+  2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
+  1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS
+  1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS
+  2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
+  3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
+  2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
+  2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
+  2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
+  2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
+  2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+  2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
+  2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
+  1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
+  2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
+  2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
+  2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
+  1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
+  1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
+  2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
+  2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
+  1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
+  1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
+  1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
+  1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS
+  1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
+  1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
+  2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
+  1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS
+  1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
+  1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
+  2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
+  2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
+  2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
+  2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
+  2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0>
+  2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
+  1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
+  2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
+  2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
+  1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
+  2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
+  2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
+  2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
+  2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+  1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
+  2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+  2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
+  2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
+  2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
+  2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
+  2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
+  2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
+  2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
+  2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
+  2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
+  2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
+  2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
+  1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
+  2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
+  2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
+  2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
+  1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
+  1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
+  1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
+  2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
+  2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
+  1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+  1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
+  2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+  2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
+  1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
+  1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
+  2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
+  2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
+  1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+  229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS
+  1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+  1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS
+  229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS
+  2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
+  2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+  2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
+  2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
+  2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
+  2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
+  2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+  2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+  2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
+  430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS
+  1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
+  1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS
+  430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS
+  1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+  118708378U, // <5,u,7,6>: Cost 1 vrev RHS
+  2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS
+  430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS
+  430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS
+  1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
+  1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS
+  430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS
+  229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS
+  118716571U, // <5,u,u,6>: Cost 1 vrev RHS
+  1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS
+  430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS
+  2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
+  2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
+  2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
+  3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
+  2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
+  3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
+  3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
+  3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
+  2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
+  2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
+  3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
+  1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
+  2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
+  2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
+  2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
+  2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
+  1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
+  2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
+  3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
+  2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
+  1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
+  3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
+  2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
+  3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
+  1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
+  3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
+  2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
+  2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
+  3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
+  2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
+  3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
+  3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
+  3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
+  2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
+  2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
+  2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
+  1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
+  3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
+  2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
+  2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
+  2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
+  2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
+  1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
+  3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
+  2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
+  3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
+  3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
+  2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
+  3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
+  2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
+  2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+  2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
+  2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
+  2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
+  3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
+  3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
+  2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
+  3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
+  2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
+  2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
+  2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
+  2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
+  2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
+  2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
+  2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
+  2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
+  4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
+  3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
+  4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
+  2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
+  2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
+  2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
+  1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
+  1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
+  2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
+  2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
+  2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+  1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
+  2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
+  3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
+  2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
+  2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
+  3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
+  2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
+  2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
+  2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
+  3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
+  2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
+  3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
+  2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
+  2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
+  3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
+  3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
+  3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
+  2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
+  2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
+  3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
+  3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
+  2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
+  2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
+  3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
+  2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
+  3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
+  2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
+  2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
+  2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
+  2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
+  3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
+  2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
+  2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
+  2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
+  3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
+  2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
+  2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
+  2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
+  3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
+  2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
+  3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
+  2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
+  3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
+  2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
+  2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
+  2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
+  3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
+  3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
+  2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
+  2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
+  3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
+  2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
+  3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
+  2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
+  2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
+  3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
+  3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
+  3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
+  2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
+  3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
+  2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
+  3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
+  3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
+  2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
+  2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
+  2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
+  3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
+  2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
+  2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
+  3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
+  4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
+  2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
+  2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
+  2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
+  2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
+  2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
+  2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
+  2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
+  2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
+  2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
+  2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
+  2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
+  1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
+  2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
+  3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
+  2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
+  3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
+  2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
+  3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
+  1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
+  2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
+  2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
+  2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
+  2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
+  3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
+  2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
+  2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
+  3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
+  2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1>
+  3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
+  3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
+  2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
+  2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
+  2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
+  3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
+  2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
+  3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
+  2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
+  2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
+  3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
+  2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
+  2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
+  2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
+  2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
+  2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
+  2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
+  2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
+  1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+  3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
+  2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
+  2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
+  2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
+  1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
+  2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
+  3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
+  1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
+  3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
+  2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
+  3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
+  2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
+  2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
+  2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
+  2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
+  2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+  2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
+  2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
+  2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
+  2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
+  2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
+  2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
+  2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
+  2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
+  2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
+  2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
+  2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
+  2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
+  2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
+  1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS
+  2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
+  3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7>
+  2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+  2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
+  1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS
+  1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
+  1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
+  2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
+  1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS
+  2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
+  1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
+  2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
+  2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+  1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS
+  3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
+  2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
+  2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
+  3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
+  2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
+  2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
+  3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
+  2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0>
+  2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
+  3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
+  3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
+  3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
+  3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
+  2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+  3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
+  3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
+  3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
+  2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+  3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
+  2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+  3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
+  3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
+  3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
+  3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
+  2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
+  3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
+  2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+  3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
+  3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
+  2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
+  2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
+  2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
+  3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
+  3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
+  3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
+  2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
+  2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
+  2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
+  2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
+  2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
+  2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
+  1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
+  2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
+  2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4>
+  1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
+  3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
+  3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
+  3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
+  3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
+  2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
+  3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
+  3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
+  3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
+  2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
+  2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
+  2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
+  2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
+  3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
+  2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
+  3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
+  2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
+  3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
+  2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
+  1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
+  2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
+  1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
+  2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
+  1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
+  2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
+  2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
+  2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
+  1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
+  1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
+  2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
+  1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
+  2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
+  1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
+  1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
+  2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
+  2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
+  1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
+  3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
+  2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
+  2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
+  3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
+  2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
+  2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
+  2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
+  3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0>
+  2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
+  2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
+  3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
+  3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
+  2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
+  3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
+  3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
+  2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
+  3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1>
+  2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1>
+  2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
+  3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
+  3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
+  3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
+  2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
+  2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
+  2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
+  3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
+  2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
+  3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
+  3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3>
+  3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
+  3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
+  2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
+  3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
+  2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
+  3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
+  2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
+  2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
+  3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
+  2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
+  3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
+  2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
+  2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
+  1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+  2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
+  1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+  2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
+  3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
+  2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
+  2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
+  2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
+  2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
+  1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+  1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
+  2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
+  2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
+  2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
+  1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
+  2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
+  2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
+  2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
+  1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
+  2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
+  2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
+  3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
+  2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
+  2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
+  2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
+  2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
+  4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
+  2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
+  1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
+  2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
+  2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
+  2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
+  1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
+  2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
+  1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+  1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
+  2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
+  3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
+  3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
+  3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
+  4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
+  3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
+  1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
+  1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0>
+  2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1>
+  3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
+  3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
+  3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
+  3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
+  3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
+  3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
+  2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
+  2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
+  2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
+  3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
+  3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
+  3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
+  2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
+  3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
+  2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
+  3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
+  2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
+  3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
+  3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
+  3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
+  3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
+  2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
+  3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
+  3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
+  2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3>
+  2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3>
+  2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
+  3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
+  3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
+  2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
+  2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
+  2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
+  3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
+  1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
+  1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
+  2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
+  3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
+  3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
+  3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
+  2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
+  2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
+  2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
+  2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
+  2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
+  2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
+  3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
+  3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
+  2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
+  2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
+  3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
+  2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
+  2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
+  2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
+  2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
+  2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
+  2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
+  3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
+  2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
+  2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
+  2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
+  3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
+  2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
+  2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
+  2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
+  2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
+  2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
+  2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
+  2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
+  2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
+  1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u>
+  1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u>
+  2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
+  1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
+  2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
+  3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
+  2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
+  3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
+  2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0>
+  4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
+  1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
+  2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
+  2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
+  2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
+  2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+  3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
+  2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
+  2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
+  3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
+  2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
+  3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
+  3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
+  2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
+  2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
+  3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
+  3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
+  2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
+  2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
+  2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
+  2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
+  3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
+  3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
+  2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
+  2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
+  3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
+  2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
+  4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
+  2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
+  2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
+  3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
+  3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
+  3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
+  1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+  1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
+  2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
+  2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
+  1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
+  3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
+  2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
+  3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
+  3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
+  2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
+  2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
+  2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
+  2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
+  2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
+  1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
+  2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
+  2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
+  2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
+  1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
+  2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
+  296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS
+  2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
+  296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS
+  2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
+  3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
+  2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
+  2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
+  2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
+  2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
+  2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
+  1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS
+  1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS
+  1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
+  1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
+  2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
+  2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+  1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
+  1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
+  296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS
+  1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS
+  296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS
+  1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+  497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS
+  1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
+  1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
+  2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+  2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
+  497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS
+  1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+  1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+  2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
+  1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+  2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+  2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
+  1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
+  2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
+  2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+  1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
+  2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
+  1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+  2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
+  1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+  1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+  2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
+  1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
+  2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
+  2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+  1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+  2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+  2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
+  1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+  497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS
+  1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
+  497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS
+  2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+  1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+  2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+  1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+  1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+  2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
+  2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
+  1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+  2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
+  2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
+  1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+  1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+  1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+  1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+  2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
+  2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
+  2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
+  1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+  2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
+  1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
+  1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
+  1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
+  1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+  497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS
+  1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+  1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+  497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS
+  1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+  1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
+  497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS
+  1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+  497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS
+  1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
+  1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
+  2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+  1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0>
+  497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS
+  1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+  2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
+  1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+  2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+  2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
+  1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
+  2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+  1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
+  2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
+  1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+  2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
+  1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+  1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+  2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
+  1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
+  2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
+  2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+  1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+  1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
+  2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
+  1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+  497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS
+  1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
+  497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS
+  2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
+  1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
+  2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
+  1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
+  2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+  1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
+  1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
+  2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
+  296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS
+  1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+  296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS
+  1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
+  2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
+  1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
+  1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS
+  1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
+  2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
+  1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
+  1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS
+  1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
+  1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
+  497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS
+  1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
+  497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS
+  296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS
+  1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS
+  497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS
+  1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+  1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
+  1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
+  3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
+  2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
+  2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
+  2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
+  3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
+  1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
+  1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
+  2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
+  564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS
+  2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
+  1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
+  2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
+  1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
+  2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
+  564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS
+  1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+  2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
+  2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
+  2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
+  1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+  2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+  2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
+  2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
+  1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
+  2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
+  2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
+  2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
+  2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
+  2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
+  2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
+  2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
+  3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
+  2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
+  2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
+  1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
+  1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
+  3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
+  2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
+  1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
+  2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
+  3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
+  1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
+  2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
+  2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
+  2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+  2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
+  2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
+  2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
+  1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
+  2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
+  1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
+  2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
+  2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+  2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
+  2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
+  3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
+  2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
+  2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
+  2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
+  2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
+  2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
+  2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
+  3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
+  3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
+  2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
+  2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
+  2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
+  2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
+  2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
+  1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
+  1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
+  564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS
+  2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
+  1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
+  1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
+  1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
+  2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
+  564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS
+  2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
+  2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
+  2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
+  1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
+  2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
+  2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+  2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
+  3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
+  1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
+  2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
+  1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+  3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
+  1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
+  2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
+  2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
+  2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
+  3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
+  1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
+  2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
+  2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
+  3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
+  1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
+  2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
+  2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
+  2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
+  2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
+  1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
+  2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
+  1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+  2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
+  2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
+  2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
+  1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
+  2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
+  2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
+  1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
+  2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
+  2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
+  2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
+  2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
+  3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
+  2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
+  2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
+  3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
+  2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
+  2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
+  2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
+  3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
+  1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
+  2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
+  3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
+  2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
+  2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
+  1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
+  3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
+  2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
+  2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+  2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
+  3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
+  2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
+  3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
+  2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
+  2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
+  2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
+  2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
+  3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
+  3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
+  2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
+  3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
+  3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
+  2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
+  3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
+  2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
+  1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
+  2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
+  1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
+  2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
+  1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
+  2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
+  2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
+  1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
+  2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
+  2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
+  2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
+  2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
+  2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
+  2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
+  2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
+  3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
+  2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
+  2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
+  3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
+  3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
+  2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
+  2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
+  3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
+  2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
+  2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+  2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
+  2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
+  2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
+  1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
+  2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
+  2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
+  2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
+  3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
+  1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
+  1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
+  2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
+  2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
+  2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
+  1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
+  2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
+  2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
+  2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
+  1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
+  2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
+  2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
+  2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
+  2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
+  2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
+  2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
+  2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
+  3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
+  2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
+  2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
+  3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
+  2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
+  2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
+  2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
+  3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
+  2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
+  3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
+  2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
+  2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
+  2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
+  2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
+  1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
+  2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
+  2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
+  2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
+  3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
+  1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
+  2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
+  3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
+  3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
+  2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
+  2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
+  3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
+  4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
+  2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
+  2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
+  1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
+  2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
+  1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
+  1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
+  2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
+  2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
+  2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
+  1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
+  2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
+  1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
+  2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
+  2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
+  2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
+  2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
+  2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
+  3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0>
+  1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
+  2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
+  2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
+  2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
+  2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
+  2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
+  2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
+  3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
+  2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
+  2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
+  2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
+  2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
+  2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
+  2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
+  2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
+  2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
+  2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
+  2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
+  2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
+  2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
+  2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
+  2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
+  1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
+  2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
+  2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
+  2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
+  1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
+  2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
+  2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
+  2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
+  2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
+  2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
+  1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
+  2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
+  3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4>
+  1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
+  2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
+  1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
+  2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
+  2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
+  2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
+  2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
+  2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
+  2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
+  1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
+  2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
+  2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
+  1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
+  2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
+  2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
+  3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
+  2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
+  2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
+  1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
+  2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
+  2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
+  2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
+  2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
+  2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
+  2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
+  2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
+  2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
+  2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
+  2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
+  1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
+  1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
+  1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
+  1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
+  2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
+  2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
+  1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
+  2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
+  1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
+  2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
+  3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
+  2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
+  1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+  1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+  3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+  1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
+  2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
+  2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
+  2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
+  2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
+  2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
+  2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
+  2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
+  3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
+  2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
+  3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
+  3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
+  2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
+  2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
+  3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
+  2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+  2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
+  3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
+  2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
+  2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
+  3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
+  3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
+  2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
+  2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
+  2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
+  2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
+  3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
+  2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
+  2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
+  3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
+  3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
+  3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
+  1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+  1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
+  1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+  3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
+  1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
+  1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
+  2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
+  2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
+  2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
+  1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
+  2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
+  564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS
+  2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
+  564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS
+  2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
+  3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
+  2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
+  2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
+  1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+  2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
+  2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
+  2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+  1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
+  2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
+  3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
+  3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
+  3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
+  2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
+  2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+  2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
+  2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
+  2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
+  1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
+  1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
+  2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
+  2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
+  1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
+  1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
+  564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS
+  2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
+  564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS
+  2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
+  2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
+  2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
+  3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
+  2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
+  2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
+  2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
+  2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
+  2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
+  2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
+  3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
+  3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
+  2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
+  2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
+  2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
+  2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
+  1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+  1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+  3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
+  3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
+  3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
+  2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
+  3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
+  2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+  3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
+  2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
+  2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
+  3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
+  2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
+  3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
+  3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
+  3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
+  2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
+  3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
+  2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
+  2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
+  2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
+  3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
+  3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
+  3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
+  2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
+  2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
+  2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
+  1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+  1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
+  2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
+  2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
+  3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
+  3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
+  2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
+  1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
+  2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+  1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+  1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
+  2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
+  2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
+  3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
+  2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
+  2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
+  2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+  2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+  1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
+  1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
+  1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
+  2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
+  2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
+  2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
+  1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
+  1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+  2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+  2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
+  1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
+  1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
+  2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
+  2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
+  2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
+  1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
+  1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
+  2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
+  1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
+  1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
+  2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
+  1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
+  2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
+  2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
+  2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
+  2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
+  2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
+  2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
+  1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
+  1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
+  2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
+  2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
+  2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
+  2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
+  2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
+  2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+  2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
+  1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
+  2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
+  2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
+  2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
+  2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
+  2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
+  2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
+  2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
+  1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+  1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+  2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
+  2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3>
+  2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3>
+  2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
+  2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
+  2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
+  3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
+  2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
+  2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
+  2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
+  2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
+  2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
+  2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
+  2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
+  1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
+  2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
+  2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
+  1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
+  2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
+  2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
+  2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
+  3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
+  1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
+  2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
+  2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
+  2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
+  1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
+  2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
+  2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
+  2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
+  3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
+  2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
+  2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
+  1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
+  1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+  1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
+  1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
+  2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
+  2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
+  2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
+  1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
+  2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
+  2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
+  2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
+  1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
+  1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
+  1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
+  2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
+  2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
+  1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
+  1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
+  1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
+  1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
+  1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
+  2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
+  1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
+  2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
+  3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
+  2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
+  2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
+  2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
+  2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
+  1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
+  2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
+  2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
+  2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
+  2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
+  2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
+  2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
+  2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
+  2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
+  2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
+  3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
+  3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
+  2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
+  2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
+  3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
+  3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
+  2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
+  2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
+  2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
+  2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
+  3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
+  3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
+  2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
+  2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
+  2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
+  2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
+  2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
+  2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
+  2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
+  3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
+  3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
+  3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
+  2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
+  1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
+  2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
+  2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
+  1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
+  2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
+  2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
+  3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
+  2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
+  2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
+  1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
+  2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
+  2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
+  1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
+  2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
+  2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
+  2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
+  2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
+  2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
+  2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
+  1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
+  2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
+  1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
+  1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
+  2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
+  2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
+  2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
+  1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
+  2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
+  2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
+  363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS
+  363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS
+  1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
+  1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
+  2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
+  2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
+  1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
+  1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
+  1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
+  363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS
+  363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS
+  1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+  1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
+  1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
+  1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
+  2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
+  1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
+  1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+  2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
+  1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
+  1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
+  1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+  564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS
+  1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
+  1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
+  2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
+  1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
+  1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+  564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS
+  1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+  2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
+  1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
+  1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+  2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
+  2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
+  1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+  1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
+  1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
+  1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+  2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
+  1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
+  1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
+  2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
+  2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
+  1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
+  2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
+  1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
+  1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
+  2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
+  1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+  1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
+  1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
+  1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+  1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
+  1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
+  1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
+  2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
+  1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
+  1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
+  1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
+  564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS
+  1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
+  564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS
+  2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
+  2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
+  1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
+  1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
+  1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+  2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
+  1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
+  1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
+  1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
+  1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
+  2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
+  2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
+  2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
+  1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
+  1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
+  2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
+  363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS
+  363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS
+  1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
+  1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
+  564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS
+  1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
+  1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
+  1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
+  564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS
+  363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS
+  564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS
+  135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS
+  1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+  1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+  2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+  1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
+  2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
+  2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
+  2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+  135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS
+  1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
+  1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS
+  537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS
+  2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
+  1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
+  2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
+  1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
+  2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
+  537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS
+  1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+  1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS
+  2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+  1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+  2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
+  2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+  1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+  2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+  2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+  72589981U, // <u,0,3,2>: Cost 1 vrev LHS
+  2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
+  2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
+  2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
+  2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
+  2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
+  73032403U, // <u,0,3,u>: Cost 1 vrev LHS
+  2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
+  1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+  1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+  3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
+  2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
+  1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
+  2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
+  2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+  1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+  2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
+  1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS
+  2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+  2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
+  2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5>
+  2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
+  1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
+  2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+  1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
+  2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
+  2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+  1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS
+  2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
+  2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
+  2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
+  2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
+  2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
+  1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS
+  2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
+  2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
+  2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
+  2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
+  2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
+  2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
+  2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
+  2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
+  2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
+  135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS
+  1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+  537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS
+  2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+  1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+  1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
+  1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
+  2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+  537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS
+  1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
+  1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
+  2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
+  1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+  2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
+  2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+  2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
+  2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
+  1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+  1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
+  202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS
+  2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
+  1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+  1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
+  2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+  2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
+  2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+  202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS
+  1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
+  2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+  2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
+  835584U, // <u,1,2,3>: Cost 0 copy LHS
+  1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
+  2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+  2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
+  1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
+  835584U, // <u,1,2,u>: Cost 0 copy LHS
+  1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
+  1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+  2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+  2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS
+  1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
+  1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+  2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+  2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
+  1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+  1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
+  2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
+  2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
+  1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4>
+  2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
+  1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
+  2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
+  2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
+  1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4>
+  1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
+  2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
+  2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
+  1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+  1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
+  2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
+  2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
+  2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
+  1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+  2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
+  2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+  2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+  2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+  2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
+  2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+  2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
+  1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
+  1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
+  2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
+  2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
+  2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
+  2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS
+  2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
+  2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7>
+  2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
+  2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
+  2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS
+  1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
+  202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS
+  2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+  835584U, // <u,1,u,3>: Cost 0 copy LHS
+  1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
+  1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+  2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+  1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
+  835584U, // <u,1,u,u>: Cost 0 copy LHS
+  1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
+  1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
+  1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
+  2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+  2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
+  2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
+  2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+  2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
+  1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
+  1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
+  2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
+  2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
+  2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+  2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
+  2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
+  2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+  2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+  1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1>
+  1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
+  2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
+  269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS
+  1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+  1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
+  2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+  2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
+  2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+  269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS
+  408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS
+  1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS
+  408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS
+  1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+  1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+  408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS
+  1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+  2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
+  2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
+  2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+  2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
+  1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
+  1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
+  2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+  1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
+  2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
+  2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
+  2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+  2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+  1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
+  2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
+  2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+  2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
+  1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5>
+  1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
+  2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
+  2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
+  1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+  1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
+  2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+  2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+  2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
+  1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+  1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
+  2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
+  2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
+  1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS
+  2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
+  2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
+  2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+  2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
+  1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS
+  408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS
+  1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
+  269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS
+  1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+  408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS
+  1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
+  1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS
+  1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+  408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS
+  1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+  471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS
+  1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
+  1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0>
+  2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+  2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0>
+  471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS
+  1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+  1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+  2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
+  1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+  1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
+  1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
+  1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+  1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
+  2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
+  1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+  1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
+  1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
+  336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS
+  1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+  2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3>
+  2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+  336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS
+  1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
+  1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
+  2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+  2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
+  1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
+  471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS
+  1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4>
+  471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS
+  2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
+  1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
+  2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
+  2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
+  1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+  1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS
+  1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS
+  2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+  2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
+  1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
+  2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
+  2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+  1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
+  1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6>
+  1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
+  1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
+  1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
+  2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
+  1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
+  2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
+  2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
+  1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
+  1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+  471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS
+  1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+  336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS
+  1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+  471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS
+  1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+  1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS
+  471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS
+  2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
+  1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
+  2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
+  2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+  2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
+  1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+  1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+  3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+  1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
+  2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
+  2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
+  1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
+  2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
+  2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
+  1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS
+  2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+  2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+  1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
+  2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
+  2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
+  2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
+  2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
+  2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
+  2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+  1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS
+  2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+  1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS
+  2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
+  2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
+  2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
+  2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
+  2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
+  2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+  3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
+  2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+  1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
+  2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
+  2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
+  2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+  161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS
+  1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
+  1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+  2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+  161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS
+  1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
+  2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+  1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
+  2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
+  1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
+  1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS
+  537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS
+  2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+  537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS
+  1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
+  2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
+  2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
+  2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
+  1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
+  2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+  1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS
+  2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+  1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
+  2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
+  2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
+  2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
+  2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
+  2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
+  2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+  96808489U, // <u,4,7,6>: Cost 1 vrev RHS
+  2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
+  96955963U, // <u,4,7,u>: Cost 1 vrev RHS
+  1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
+  1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
+  1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
+  2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
+  161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS
+  1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
+  537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS
+  2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+  537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS
+  2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
+  1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
+  2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
+  2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
+  1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+  2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+  2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
+  1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0>
+  1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
+  1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
+  2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
+  2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
+  2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
+  1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
+  2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
+  2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
+  1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+  1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+  2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
+  2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
+  2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
+  1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
+  2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
+  2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+  2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
+  3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
+  1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
+  2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
+  2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
+  2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
+  2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
+  2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
+  3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
+  2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS
+  2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS
+  2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
+  2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+  2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
+  2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
+  1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
+  1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
+  2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
+  1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+  1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
+  1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
+  2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
+  2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
+  2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
+  1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
+  229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS
+  2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+  1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+  229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS
+  1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
+  2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
+  2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
+  1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
+  1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
+  2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+  2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+  27705344U, // <u,5,6,7>: Cost 0 copy RHS
+  27705344U, // <u,5,6,u>: Cost 0 copy RHS
+  1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
+  1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
+  2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
+  2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
+  1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
+  1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+  2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+  2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS
+  1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
+  1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
+  1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
+  2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
+  1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
+  1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
+  229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS
+  2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  27705344U, // <u,5,u,7>: Cost 0 copy RHS
+  27705344U, // <u,5,u,u>: Cost 0 copy RHS
+  2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
+  1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
+  1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
+  2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0>
+  1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
+  2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
+  2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0>
+  2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
+  1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
+  1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
+  2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
+  2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
+  2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
+  2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
+  2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
+  2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+  2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
+  1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1>
+  1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
+  2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
+  2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
+  2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
+  1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
+  2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
+  2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
+  1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+  1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+  2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
+  2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3>
+  2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3>
+  2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
+  1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
+  2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
+  3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+  1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS
+  1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS
+  2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
+  2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
+  2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
+  2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
+  1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+  1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
+  1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
+  2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
+  1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
+  2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
+  2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
+  2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
+  2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+  1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
+  2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
+  2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
+  2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
+  1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5>
+  1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
+  2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
+  2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
+  2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
+  1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
+  2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
+  296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS
+  1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+  296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS
+  432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS
+  1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+  1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+  432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS
+  1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+  1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
+  1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS
+  432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS
+  432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS
+  1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
+  1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+  432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS
+  1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
+  296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS
+  1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS
+  432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS
+  1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+  497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS
+  1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
+  1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
+  2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
+  2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
+  497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS
+  1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+  1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
+  2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
+  1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
+  2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
+  2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
+  1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
+  2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
+  1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
+  1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
+  2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
+  1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
+  2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
+  1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
+  1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
+  2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
+  1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
+  1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
+  2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
+  1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
+  2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+  2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
+  1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+  497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS
+  1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
+  497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS
+  2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+  1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+  2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+  1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+  1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+  1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
+  2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+  1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+  1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
+  1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
+  1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+  1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+  1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+  1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+  2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
+  2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
+  2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
+  1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+  1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
+  1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
+  363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS
+  363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS
+  1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+  497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS
+  1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+  1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+  497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS
+  1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+  363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS
+  497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS
+  135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS
+  471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS
+  1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+  1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
+  1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+  1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0>
+  471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS
+  1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS
+  537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS
+  1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
+  1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
+  1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+  537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS
+  1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
+  1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2>
+  269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS
+  835584U, // <u,u,2,3>: Cost 0 copy LHS
+  1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
+  2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+  835584U, // <u,u,2,u>: Cost 0 copy LHS
+  408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS
+  1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  120371557U, // <u,u,3,2>: Cost 1 vrev LHS
+  336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS
+  408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS
+  1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+  1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS
+  408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS
+  1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
+  1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
+  1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+  1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4>
+  161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS
+  471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS
+  1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+  471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS
+  1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
+  1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
+  1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+  1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
+  229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS
+  537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS
+  1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS
+  537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS
+  1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
+  2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
+  1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+  1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
+  1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6>
+  296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS
+  27705344U, // <u,u,6,7>: Cost 0 copy RHS
+  27705344U, // <u,u,6,u>: Cost 0 copy RHS
+  432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS
+  1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
+  1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
+  1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS
+  432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS
+  1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+  120699277U, // <u,u,7,6>: Cost 1 vrev RHS
+  363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS
+  432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS
+  408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS
+  471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS
+  537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS
+  835584U, // <u,u,u,3>: Cost 0 copy LHS
+  408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS
+  471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS
+  537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS
+  27705344U, // <u,u,u,7>: Cost 0 copy RHS
+  835584U, // <u,u,u,u>: Cost 0 copy LHS
+  0
+};

Added: llvm/trunk/lib/Target/ARM64/ARM64PromoteConstant.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64PromoteConstant.cpp?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64PromoteConstant.cpp (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64PromoteConstant.cpp Sat Mar 29 05:18:08 2014
@@ -0,0 +1,588 @@
+
+//===-- ARM64PromoteConstant.cpp --- Promote constant to global for ARM64 -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM64PromoteConstant pass which promotes constant
+// to global variables when this is likely to be more efficient.
+// Currently only types related to constant vector (i.e., constant vector, array
+// of constant vectors, constant structure with a constant vector field, etc.)
+// are promoted to global variables.
+// Indeed, constant vector are likely to be lowered in target constant pool
+// during instruction selection.
+// Therefore, the access will remain the same (memory load), but the structures
+// types are not split into different constant pool accesses for each field.
+// The bonus side effect is that created globals may be merged by the global
+// merge pass.
+//
+// FIXME: This pass may be useful for other targets too.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm64-promote-const"
+#include "ARM64.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+// Stress testing mode - disable heuristics.
+static cl::opt<bool> Stress("arm64-stress-promote-const", cl::Hidden,
+                            cl::desc("Promote all vector constants"));
+
+STATISTIC(NumPromoted, "Number of promoted constants");
+STATISTIC(NumPromotedUses, "Number of promoted constants uses");
+
+//===----------------------------------------------------------------------===//
+//                       ARM64PromoteConstant
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// Promotes interesting constant into global variables.
+/// The motivating example is:
+/// static const uint16_t TableA[32] = {
+///   41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768,
+///   31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215,
+///   25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846,
+///   21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725,
+/// };
+///
+/// uint8x16x4_t LoadStatic(void) {
+///   uint8x16x4_t ret;
+///   ret.val[0] = vld1q_u16(TableA +  0);
+///   ret.val[1] = vld1q_u16(TableA +  8);
+///   ret.val[2] = vld1q_u16(TableA + 16);
+///   ret.val[3] = vld1q_u16(TableA + 24);
+///   return ret;
+/// }
+///
+/// The constants in that example are folded into the uses. Thus, 4 different
+/// constants are created.
+/// As their type is vector the cheapest way to create them is to load them
+/// for the memory.
+/// Therefore the final assembly final has 4 different load.
+/// With this pass enabled, only one load is issued for the constants.
+class ARM64PromoteConstant : public ModulePass {
+
+public:
+  static char ID;
+  ARM64PromoteConstant() : ModulePass(ID) {}
+
+  virtual const char *getPassName() const { return "ARM64 Promote Constant"; }
+
+  /// Iterate over the functions and promote the interesting constants into
+  /// global variables with module scope.
+  bool runOnModule(Module &M) {
+    DEBUG(dbgs() << getPassName() << '\n');
+    bool Changed = false;
+    for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+         ++IFn) {
+      Changed |= runOnFunction(*IFn);
+    }
+    return Changed;
+  }
+
+private:
+  /// Look for interesting constants used within the given function.
+  /// Promote them into global variables, load these global variables within
+  /// the related function, so that the number of inserted load is minimal.
+  bool runOnFunction(Function &F);
+
+  // This transformation requires dominator info
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesCFG();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+  }
+
+  /// Type to store a list of User
+  typedef SmallVector<Value::user_iterator, 4> Users;
+  /// Map an insertion point to all the uses it dominates.
+  typedef DenseMap<Instruction *, Users> InsertionPoints;
+  /// Map a function to the required insertion point of load for a
+  /// global variable
+  typedef DenseMap<Function *, InsertionPoints> InsertionPointsPerFunc;
+
+  /// Find the closest point that dominates the given Use.
+  Instruction *findInsertionPoint(Value::user_iterator &Use);
+
+  /// Check if the given insertion point is dominated by an existing
+  /// insertion point.
+  /// If true, the given use is added to the list of dominated uses for
+  /// the related existing point.
+  /// \param NewPt the insertion point to be checked
+  /// \param UseIt the use to be added into the list of dominated uses
+  /// \param InsertPts existing insertion points
+  /// \pre NewPt and all instruction in InsertPts belong to the same function
+  /// \retun true if one of the insertion point in InsertPts dominates NewPt,
+  ///        false otherwise
+  bool isDominated(Instruction *NewPt, Value::user_iterator &UseIt,
+                   InsertionPoints &InsertPts);
+
+  /// Check if the given insertion point can be merged with an existing
+  /// insertion point in a common dominator.
+  /// If true, the given use is added to the list of the created insertion
+  /// point.
+  /// \param NewPt the insertion point to be checked
+  /// \param UseIt the use to be added into the list of dominated uses
+  /// \param InsertPts existing insertion points
+  /// \pre NewPt and all instruction in InsertPts belong to the same function
+  /// \pre isDominated returns false for the exact same parameters.
+  /// \retun true if it exists an insertion point in InsertPts that could
+  ///        have been merged with NewPt in a common dominator,
+  ///        false otherwise
+  bool tryAndMerge(Instruction *NewPt, Value::user_iterator &UseIt,
+                   InsertionPoints &InsertPts);
+
+  /// Compute the minimal insertion points to dominates all the interesting
+  /// uses of value.
+  /// Insertion points are group per function and each insertion point
+  /// contains a list of all the uses it dominates within the related function
+  /// \param Val constant to be examined
+  /// \param InsPtsPerFunc[out] output storage of the analysis
+  void computeInsertionPoints(Constant *Val,
+                              InsertionPointsPerFunc &InsPtsPerFunc);
+
+  /// Insert a definition of a new global variable at each point contained in
+  /// InsPtsPerFunc and update the related uses (also contained in
+  /// InsPtsPerFunc).
+  bool insertDefinitions(Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc);
+
+  /// Compute the minimal insertion points to dominate all the interesting
+  /// uses of Val and insert a definition of a new global variable
+  /// at these points.
+  /// Also update the uses of Val accordingly.
+  /// Currently a use of Val is considered interesting if:
+  /// - Val is not UndefValue
+  /// - Val is not zeroinitialized
+  /// - Replacing Val per a load of a global variable is valid.
+  /// \see shouldConvert for more details
+  bool computeAndInsertDefinitions(Constant *Val);
+
+  /// Promote the given constant into a global variable if it is expected to
+  /// be profitable.
+  /// \return true if Cst has been promoted
+  bool promoteConstant(Constant *Cst);
+
+  /// Transfer the list of dominated uses of IPI to NewPt in InsertPts.
+  /// Append UseIt to this list and delete the entry of IPI in InsertPts.
+  static void appendAndTransferDominatedUses(Instruction *NewPt,
+                                             Value::user_iterator &UseIt,
+                                             InsertionPoints::iterator &IPI,
+                                             InsertionPoints &InsertPts) {
+    // Record the dominated use
+    IPI->second.push_back(UseIt);
+    // Transfer the dominated uses of IPI to NewPt
+    // Inserting into the DenseMap may invalidate existing iterator.
+    // Keep a copy of the key to find the iterator to erase.
+    Instruction *OldInstr = IPI->first;
+    InsertPts.insert(InsertionPoints::value_type(NewPt, IPI->second));
+    // Erase IPI
+    IPI = InsertPts.find(OldInstr);
+    InsertPts.erase(IPI);
+  }
+};
+} // end anonymous namespace
+
+char ARM64PromoteConstant::ID = 0;
+
+namespace llvm {
+void initializeARM64PromoteConstantPass(PassRegistry &);
+}
+
+INITIALIZE_PASS_BEGIN(ARM64PromoteConstant, "arm64-promote-const",
+                      "ARM64 Promote Constant Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ARM64PromoteConstant, "arm64-promote-const",
+                    "ARM64 Promote Constant Pass", false, false)
+
+ModulePass *llvm::createARM64PromoteConstantPass() {
+  return new ARM64PromoteConstant();
+}
+
+/// Check if the given type uses a vector type.
+static bool isConstantUsingVectorTy(const Type *CstTy) {
+  if (CstTy->isVectorTy())
+    return true;
+  if (CstTy->isStructTy()) {
+    for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements();
+         EltIdx < EndEltIdx; ++EltIdx)
+      if (isConstantUsingVectorTy(CstTy->getStructElementType(EltIdx)))
+        return true;
+  } else if (CstTy->isArrayTy())
+    return isConstantUsingVectorTy(CstTy->getArrayElementType());
+  return false;
+}
+
+/// Check if the given use (Instruction + OpIdx) of Cst should be converted into
+/// a load of a global variable initialized with Cst.
+/// A use should be converted if it is legal to do so.
+/// For instance, it is not legal to turn the mask operand of a shuffle vector
+/// into a load of a global variable.
+static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr,
+                             unsigned OpIdx) {
+  // shufflevector instruction expects a const for the mask argument, i.e., the
+  // third argument. Do not promote this use in that case.
+  if (isa<const ShuffleVectorInst>(Instr) && OpIdx == 2)
+    return false;
+
+  // extractvalue instruction expects a const idx
+  if (isa<const ExtractValueInst>(Instr) && OpIdx > 0)
+    return false;
+
+  // extractvalue instruction expects a const idx
+  if (isa<const InsertValueInst>(Instr) && OpIdx > 1)
+    return false;
+
+  if (isa<const AllocaInst>(Instr) && OpIdx > 0)
+    return false;
+
+  // Alignment argument must be constant
+  if (isa<const LoadInst>(Instr) && OpIdx > 0)
+    return false;
+
+  // Alignment argument must be constant
+  if (isa<const StoreInst>(Instr) && OpIdx > 1)
+    return false;
+
+  // Index must be constant
+  if (isa<const GetElementPtrInst>(Instr) && OpIdx > 0)
+    return false;
+
+  // Personality function and filters must be constant.
+  // Give up on that instruction.
+  if (isa<const LandingPadInst>(Instr))
+    return false;
+
+  // switch instruction expects constants to compare to
+  if (isa<const SwitchInst>(Instr))
+    return false;
+
+  // Expected address must be a constant
+  if (isa<const IndirectBrInst>(Instr))
+    return false;
+
+  // Do not mess with intrinsic
+  if (isa<const IntrinsicInst>(Instr))
+    return false;
+
+  // Do not mess with inline asm
+  const CallInst *CI = dyn_cast<const CallInst>(Instr);
+  if (CI && isa<const InlineAsm>(CI->getCalledValue()))
+    return false;
+
+  return true;
+}
+
+/// Check if the given Cst should be converted into
+/// a load of a global variable initialized with Cst.
+/// A constant should be converted if it is likely that the materialization of
+/// the constant will be tricky. Thus, we give up on zero or undef values.
+///
+/// \todo Currently, accept only vector related types.
+/// Also we give up on all simple vector type to keep the existing
+/// behavior. Otherwise, we should push here all the check of the lowering of
+/// BUILD_VECTOR. By giving up, we lose the potential benefit of merging
+/// constant via global merge and the fact that the same constant is stored
+/// only once with this method (versus, as many function that uses the constant
+/// for the regular approach, even for float).
+/// Again, the simplest solution would be to promote every
+/// constant and rematerialize them when they are actually cheap to create.
+static bool shouldConvert(const Constant *Cst) {
+  if (isa<const UndefValue>(Cst))
+    return false;
+
+  // FIXME: In some cases, it may be interesting to promote in memory
+  // a zero initialized constant.
+  // E.g., when the type of Cst require more instructions than the
+  // adrp/add/load sequence or when this sequence can be shared by several
+  // instances of Cst.
+  // Ideally, we could promote this into a global and rematerialize the constant
+  // when it was a bad idea.
+  if (Cst->isZeroValue())
+    return false;
+
+  if (Stress)
+    return true;
+
+  // FIXME: see function \todo
+  if (Cst->getType()->isVectorTy())
+    return false;
+  return isConstantUsingVectorTy(Cst->getType());
+}
+
+Instruction *
+ARM64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) {
+  // If this user is a phi, the insertion point is in the related
+  // incoming basic block
+  PHINode *PhiInst = dyn_cast<PHINode>(*Use);
+  Instruction *InsertionPoint;
+  if (PhiInst)
+    InsertionPoint =
+        PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator();
+  else
+    InsertionPoint = dyn_cast<Instruction>(*Use);
+  assert(InsertionPoint && "User is not an instruction!");
+  return InsertionPoint;
+}
+
+bool ARM64PromoteConstant::isDominated(Instruction *NewPt,
+                                       Value::user_iterator &UseIt,
+                                       InsertionPoints &InsertPts) {
+
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
+      *NewPt->getParent()->getParent()).getDomTree();
+
+  // Traverse all the existing insertion point and check if one is dominating
+  // NewPt
+  for (InsertionPoints::iterator IPI = InsertPts.begin(),
+                                 EndIPI = InsertPts.end();
+       IPI != EndIPI; ++IPI) {
+    if (NewPt == IPI->first || DT.dominates(IPI->first, NewPt) ||
+        // When IPI->first is a terminator instruction, DT may think that
+        // the result is defined on the edge.
+        // Here we are testing the insertion point, not the definition.
+        (IPI->first->getParent() != NewPt->getParent() &&
+         DT.dominates(IPI->first->getParent(), NewPt->getParent()))) {
+      // No need to insert this point
+      // Record the dominated use
+      DEBUG(dbgs() << "Insertion point dominated by:\n");
+      DEBUG(IPI->first->print(dbgs()));
+      DEBUG(dbgs() << '\n');
+      IPI->second.push_back(UseIt);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool ARM64PromoteConstant::tryAndMerge(Instruction *NewPt,
+                                       Value::user_iterator &UseIt,
+                                       InsertionPoints &InsertPts) {
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
+      *NewPt->getParent()->getParent()).getDomTree();
+  BasicBlock *NewBB = NewPt->getParent();
+
+  // Traverse all the existing insertion point and check if one is dominated by
+  // NewPt and thus useless or can be combined with NewPt into a common
+  // dominator
+  for (InsertionPoints::iterator IPI = InsertPts.begin(),
+                                 EndIPI = InsertPts.end();
+       IPI != EndIPI; ++IPI) {
+    BasicBlock *CurBB = IPI->first->getParent();
+    if (NewBB == CurBB) {
+      // Instructions are in the same block.
+      // By construction, NewPt is dominating the other.
+      // Indeed, isDominated returned false with the exact same arguments.
+      DEBUG(dbgs() << "Merge insertion point with:\n");
+      DEBUG(IPI->first->print(dbgs()));
+      DEBUG(dbgs() << "\nat considered insertion point.\n");
+      appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
+      return true;
+    }
+
+    // Look for a common dominator
+    BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB);
+    // If none exists, we cannot merge these two points
+    if (!CommonDominator)
+      continue;
+
+    if (CommonDominator != NewBB) {
+      // By construction, the CommonDominator cannot be CurBB
+      assert(CommonDominator != CurBB &&
+             "Instruction has not been rejected during isDominated check!");
+      // Take the last instruction of the CommonDominator as insertion point
+      NewPt = CommonDominator->getTerminator();
+    }
+    // else, CommonDominator is the block of NewBB, hence NewBB is the last
+    // possible insertion point in that block
+    DEBUG(dbgs() << "Merge insertion point with:\n");
+    DEBUG(IPI->first->print(dbgs()));
+    DEBUG(dbgs() << '\n');
+    DEBUG(NewPt->print(dbgs()));
+    DEBUG(dbgs() << '\n');
+    appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
+    return true;
+  }
+  return false;
+}
+
+void ARM64PromoteConstant::computeInsertionPoints(
+    Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) {
+  DEBUG(dbgs() << "** Compute insertion points **\n");
+  for (Value::user_iterator UseIt = Val->user_begin(),
+                            EndUseIt = Val->user_end();
+       UseIt != EndUseIt; ++UseIt) {
+    // If the user is not an Instruction, we cannot modify it
+    if (!isa<Instruction>(*UseIt))
+      continue;
+
+    // Filter out uses that should not be converted
+    if (!shouldConvertUse(Val, cast<Instruction>(*UseIt), UseIt.getOperandNo()))
+      continue;
+
+    DEBUG(dbgs() << "Considered use, opidx " << UseIt.getOperandNo() << ":\n");
+    DEBUG((*UseIt)->print(dbgs()));
+    DEBUG(dbgs() << '\n');
+
+    Instruction *InsertionPoint = findInsertionPoint(UseIt);
+
+    DEBUG(dbgs() << "Considered insertion point:\n");
+    DEBUG(InsertionPoint->print(dbgs()));
+    DEBUG(dbgs() << '\n');
+
+    // Check if the current insertion point is useless, i.e., it is dominated
+    // by another one.
+    InsertionPoints &InsertPts =
+        InsPtsPerFunc[InsertionPoint->getParent()->getParent()];
+    if (isDominated(InsertionPoint, UseIt, InsertPts))
+      continue;
+    // This insertion point is useful, check if we can merge some insertion
+    // point in a common dominator or if NewPt dominates an existing one.
+    if (tryAndMerge(InsertionPoint, UseIt, InsertPts))
+      continue;
+
+    DEBUG(dbgs() << "Keep considered insertion point\n");
+
+    // It is definitely useful by its own
+    InsertPts[InsertionPoint].push_back(UseIt);
+  }
+}
+
+bool
+ARM64PromoteConstant::insertDefinitions(Constant *Cst,
+                                        InsertionPointsPerFunc &InsPtsPerFunc) {
+  // We will create one global variable per Module
+  DenseMap<Module *, GlobalVariable *> ModuleToMergedGV;
+  bool HasChanged = false;
+
+  // Traverse all insertion points in all the function
+  for (InsertionPointsPerFunc::iterator FctToInstPtsIt = InsPtsPerFunc.begin(),
+                                        EndIt = InsPtsPerFunc.end();
+       FctToInstPtsIt != EndIt; ++FctToInstPtsIt) {
+    InsertionPoints &InsertPts = FctToInstPtsIt->second;
+// Do more check for debug purposes
+#ifndef NDEBUG
+    DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
+        *FctToInstPtsIt->first).getDomTree();
+#endif
+    GlobalVariable *PromotedGV;
+    assert(!InsertPts.empty() && "Empty uses does not need a definition");
+
+    Module *M = FctToInstPtsIt->first->getParent();
+    DenseMap<Module *, GlobalVariable *>::iterator MapIt =
+        ModuleToMergedGV.find(M);
+    if (MapIt == ModuleToMergedGV.end()) {
+      PromotedGV = new GlobalVariable(
+          *M, Cst->getType(), true, GlobalValue::InternalLinkage, 0,
+          "_PromotedConst", 0, GlobalVariable::NotThreadLocal);
+      PromotedGV->setInitializer(Cst);
+      ModuleToMergedGV[M] = PromotedGV;
+      DEBUG(dbgs() << "Global replacement: ");
+      DEBUG(PromotedGV->print(dbgs()));
+      DEBUG(dbgs() << '\n');
+      ++NumPromoted;
+      HasChanged = true;
+    } else {
+      PromotedGV = MapIt->second;
+    }
+
+    for (InsertionPoints::iterator IPI = InsertPts.begin(),
+                                   EndIPI = InsertPts.end();
+         IPI != EndIPI; ++IPI) {
+      // Create the load of the global variable
+      IRBuilder<> Builder(IPI->first->getParent(), IPI->first);
+      LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV);
+      DEBUG(dbgs() << "**********\n");
+      DEBUG(dbgs() << "New def: ");
+      DEBUG(LoadedCst->print(dbgs()));
+      DEBUG(dbgs() << '\n');
+
+      // Update the dominated uses
+      Users &DominatedUsers = IPI->second;
+      for (Users::iterator UseIt = DominatedUsers.begin(),
+                           EndIt = DominatedUsers.end();
+           UseIt != EndIt; ++UseIt) {
+#ifndef NDEBUG
+        assert((DT.dominates(LoadedCst, cast<Instruction>(**UseIt)) ||
+                (isa<PHINode>(**UseIt) &&
+                 DT.dominates(LoadedCst, findInsertionPoint(*UseIt)))) &&
+               "Inserted definition does not dominate all its uses!");
+#endif
+        DEBUG(dbgs() << "Use to update " << UseIt->getOperandNo() << ":");
+        DEBUG((*UseIt)->print(dbgs()));
+        DEBUG(dbgs() << '\n');
+        (*UseIt)->setOperand(UseIt->getOperandNo(), LoadedCst);
+        ++NumPromotedUses;
+      }
+    }
+  }
+  return HasChanged;
+}
+
+bool ARM64PromoteConstant::computeAndInsertDefinitions(Constant *Val) {
+  InsertionPointsPerFunc InsertPtsPerFunc;
+  computeInsertionPoints(Val, InsertPtsPerFunc);
+  return insertDefinitions(Val, InsertPtsPerFunc);
+}
+
+bool ARM64PromoteConstant::promoteConstant(Constant *Cst) {
+  assert(Cst && "Given variable is not a valid constant.");
+
+  if (!shouldConvert(Cst))
+    return false;
+
+  DEBUG(dbgs() << "******************************\n");
+  DEBUG(dbgs() << "Candidate constant: ");
+  DEBUG(Cst->print(dbgs()));
+  DEBUG(dbgs() << '\n');
+
+  return computeAndInsertDefinitions(Cst);
+}
+
+bool ARM64PromoteConstant::runOnFunction(Function &F) {
+  // Look for instructions using constant vector
+  // Promote that constant to a global variable.
+  // Create as few load of this variable as possible and update the uses
+  // accordingly
+  bool LocalChange = false;
+  SmallSet<Constant *, 8> AlreadyChecked;
+
+  for (Function::iterator IBB = F.begin(), IEndBB = F.end(); IBB != IEndBB;
+       ++IBB) {
+    for (BasicBlock::iterator II = IBB->begin(), IEndI = IBB->end();
+         II != IEndI; ++II) {
+      // Traverse the operand, looking for constant vectors
+      // Replace them by a load of a global variable of type constant vector
+      for (unsigned OpIdx = 0, EndOpIdx = II->getNumOperands();
+           OpIdx != EndOpIdx; ++OpIdx) {
+        Constant *Cst = dyn_cast<Constant>(II->getOperand(OpIdx));
+        // There is no point is promoting global value, they are already global.
+        // Do not promote constant expression, as they may require some code
+        // expansion.
+        if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) &&
+            AlreadyChecked.insert(Cst))
+          LocalChange |= promoteConstant(Cst);
+      }
+    }
+  }
+  return LocalChange;
+}

Added: llvm/trunk/lib/Target/ARM64/ARM64RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64RegisterInfo.cpp?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64RegisterInfo.cpp (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64RegisterInfo.cpp Sat Mar 29 05:18:08 2014
@@ -0,0 +1,402 @@
+//===- ARM64RegisterInfo.cpp - ARM64 Register Information -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM64 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM64RegisterInfo.h"
+#include "ARM64FrameLowering.h"
+#include "ARM64InstrInfo.h"
+#include "ARM64Subtarget.h"
+#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetOptions.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "ARM64GenRegisterInfo.inc"
+
+using namespace llvm;
+
+ARM64RegisterInfo::ARM64RegisterInfo(const ARM64InstrInfo *tii,
+                                     const ARM64Subtarget *sti)
+    : ARM64GenRegisterInfo(ARM64::LR), TII(tii), STI(sti) {}
+
+const uint16_t *
+ARM64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  assert(MF && "Invalid MachineFunction pointer.");
+  if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
+    return CSR_ARM64_AllRegs_SaveList;
+  else
+    return CSR_ARM64_AAPCS_SaveList;
+}
+
+const uint32_t *
+ARM64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+  if (CC == CallingConv::AnyReg)
+    return CSR_ARM64_AllRegs_RegMask;
+  else
+    return CSR_ARM64_AAPCS_RegMask;
+}
+
+const uint32_t *ARM64RegisterInfo::getTLSCallPreservedMask() const {
+  if (STI->isTargetDarwin())
+    return CSR_ARM64_TLS_Darwin_RegMask;
+
+  assert(STI->isTargetELF() && "only expect Darwin or ELF TLS");
+  return CSR_ARM64_TLS_ELF_RegMask;
+}
+
+const uint32_t *
+ARM64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
+  // This should return a register mask that is the same as that returned by
+  // getCallPreservedMask but that additionally preserves the register used for
+  // the first i64 argument (which must also be the register used to return a
+  // single i64 return value)
+  //
+  // In case that the calling convention does not use the same register for
+  // both, the function should return NULL (does not currently apply)
+  return CSR_ARM64_AAPCS_ThisReturn_RegMask;
+}
+
+BitVector ARM64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  // FIXME: avoid re-calculating this everytime.
+  BitVector Reserved(getNumRegs());
+  Reserved.set(ARM64::SP);
+  Reserved.set(ARM64::XZR);
+  Reserved.set(ARM64::WSP);
+  Reserved.set(ARM64::WZR);
+
+  if (TFI->hasFP(MF) || STI->isTargetDarwin()) {
+    Reserved.set(ARM64::FP);
+    Reserved.set(ARM64::W29);
+  }
+
+  if (STI->isTargetDarwin()) {
+    Reserved.set(ARM64::X18); // Platform register
+    Reserved.set(ARM64::W18);
+  }
+
+  if (hasBasePointer(MF)) {
+    Reserved.set(ARM64::X19);
+    Reserved.set(ARM64::W19);
+  }
+
+  return Reserved;
+}
+
+bool ARM64RegisterInfo::isReservedReg(const MachineFunction &MF,
+                                      unsigned Reg) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  switch (Reg) {
+  default:
+    break;
+  case ARM64::SP:
+  case ARM64::XZR:
+  case ARM64::WSP:
+  case ARM64::WZR:
+    return true;
+  case ARM64::X18:
+  case ARM64::W18:
+    return STI->isTargetDarwin();
+  case ARM64::FP:
+  case ARM64::W29:
+    return TFI->hasFP(MF) || STI->isTargetDarwin();
+  case ARM64::W19:
+  case ARM64::X19:
+    return hasBasePointer(MF);
+  }
+
+  return false;
+}
+
+const TargetRegisterClass *
+ARM64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
+                                      unsigned Kind) const {
+  return &ARM64::GPR64RegClass;
+}
+
+const TargetRegisterClass *
+ARM64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+  if (RC == &ARM64::CCRRegClass)
+    return NULL; // Can't copy CPSR.
+  return RC;
+}
+
+unsigned ARM64RegisterInfo::getBaseRegister() const { return ARM64::X19; }
+
+bool ARM64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // In the presence of variable sized objects, if the fixed stack size is
+  // large enough that referencing from the FP won't result in things being
+  // in range relatively often, we can use a base pointer to allow access
+  // from the other direction like the SP normally works.
+  if (MFI->hasVarSizedObjects()) {
+    // Conservatively estimate whether the negative offset from the frame
+    // pointer will be sufficient to reach. If a function has a smallish
+    // frame, it's less likely to have lots of spills and callee saved
+    // space, so it's all more likely to be within range of the frame pointer.
+    // If it's wrong, we'll materialize the constant and still get to the
+    // object; it's just suboptimal. Negative offsets use the unscaled
+    // load/store instructions, which have a 9-bit signed immediate.
+    if (MFI->getLocalFrameSize() < 256)
+      return false;
+    return true;
+  }
+
+  return false;
+}
+
+unsigned ARM64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? ARM64::FP : ARM64::SP;
+}
+
+bool
+ARM64RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+  return true;
+}
+
+bool ARM64RegisterInfo::requiresVirtualBaseRegisters(const MachineFunction &MF)
+    const {
+  return true;
+}
+
+bool
+ARM64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  // ARM64FrameLowering::resolveFrameIndexReference() can always fall back
+  // to the stack pointer, so only put the emergency spill slot next to the
+  // FP when there's no better way to access it (SP or base pointer).
+  return MFI->hasVarSizedObjects() && !hasBasePointer(MF);
+}
+
+bool ARM64RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF)
+    const {
+  return true;
+}
+
+bool ARM64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Only consider eliminating leaf frames.
+  if (MFI->hasCalls() || (MF.getTarget().Options.DisableFramePointerElim(MF) &&
+                          MFI->adjustsStack()))
+    return true;
+  return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
+}
+
+/// needsFrameBaseReg - Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool ARM64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
+                                          int64_t Offset) const {
+  for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i)
+    assert(i < MI->getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+
+  // It's the load/store FI references that cause issues, as it can be difficult
+  // to materialize the offset if it won't fit in the literal field. Estimate
+  // based on the size of the local frame and some conservative assumptions
+  // about the rest of the stack frame (note, this is pre-regalloc, so
+  // we don't know everything for certain yet) whether this offset is likely
+  // to be out of range of the immediate. Return true if so.
+
+  // We only generate virtual base registers for loads and stores, so
+  // return false for everything else.
+  if (!MI->mayLoad() && !MI->mayStore())
+    return false;
+
+  // Without a virtual base register, if the function has variable sized
+  // objects, all fixed-size local references will be via the frame pointer,
+  // Approximate the offset and see if it's legal for the instruction.
+  // Note that the incoming offset is based on the SP value at function entry,
+  // so it'll be negative.
+  MachineFunction &MF = *MI->getParent()->getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Estimate an offset from the frame pointer.
+  // Conservatively assume all GPR callee-saved registers get pushed.
+  // FP, LR, X19-X28, D8-D15. 64-bits each.
+  int64_t FPOffset = Offset - 16 * 20;
+  // Estimate an offset from the stack pointer.
+  // The incoming offset is relating to the SP at the start of the function,
+  // but when we access the local it'll be relative to the SP after local
+  // allocation, so adjust our SP-relative offset by that allocation size.
+  Offset += MFI->getLocalFrameSize();
+  // Assume that we'll have at least some spill slots allocated.
+  // FIXME: This is a total SWAG number. We should run some statistics
+  //        and pick a real one.
+  Offset += 128; // 128 bytes of spill slots
+
+  // If there is a frame pointer, try using it.
+  // The FP is only available if there is no dynamic realignment. We
+  // don't know for sure yet whether we'll need that, so we guess based
+  // on whether there are any local variables that would trigger it.
+  if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset))
+    return false;
+
+  // If we can reference via the stack pointer or base pointer, try that.
+  // FIXME: This (and the code that resolves the references) can be improved
+  //        to only disallow SP relative references in the live range of
+  //        the VLA(s). In practice, it's unclear how much difference that
+  //        would make, but it may be worth doing.
+  if (isFrameOffsetLegal(MI, Offset))
+    return false;
+
+  // The offset likely isn't legal; we want to allocate a virtual base register.
+  return true;
+}
+
+bool ARM64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+                                           int64_t Offset) const {
+  assert(Offset <= INT_MAX && "Offset too big to fit in int.");
+  assert(MI && "Unable to get the legal offset for nil instruction.");
+  int SaveOffset = Offset;
+  return isARM64FrameOffsetLegal(*MI, SaveOffset) & ARM64FrameOffsetIsLegal;
+}
+
+/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
+/// at the beginning of the basic block.
+void ARM64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                                                     unsigned BaseReg,
+                                                     int FrameIdx,
+                                                     int64_t Offset) const {
+  MachineBasicBlock::iterator Ins = MBB->begin();
+  DebugLoc DL; // Defaults to "unknown"
+  if (Ins != MBB->end())
+    DL = Ins->getDebugLoc();
+
+  const MCInstrDesc &MCID = TII->get(ARM64::ADDXri);
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  const MachineFunction &MF = *MBB->getParent();
+  MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
+  unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0);
+
+  BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+      .addFrameIndex(FrameIdx)
+      .addImm(Offset)
+      .addImm(Shifter);
+}
+
+void ARM64RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+                                          unsigned BaseReg,
+                                          int64_t Offset) const {
+  MachineInstr &MI = *I;
+  int Off = Offset; // ARM doesn't need the general 64-bit offsets
+  unsigned i = 0;
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+  bool Done = rewriteARM64FrameIndex(MI, i, BaseReg, Off, TII);
+  assert(Done && "Unable to resolve frame index!");
+  (void)Done;
+}
+
+void ARM64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                            int SPAdj, unsigned FIOperandNum,
+                                            RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unexpected");
+
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const ARM64FrameLowering *TFI = static_cast<const ARM64FrameLowering *>(
+      MF.getTarget().getFrameLowering());
+
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+  unsigned FrameReg;
+  int Offset;
+
+  // Special handling of dbg_value, stackmap and patchpoint instructions.
+  if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP ||
+      MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+    Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
+                                             /*PreferFP=*/true);
+    Offset += MI.getOperand(FIOperandNum + 1).getImm();
+    MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+    return;
+  }
+
+  // Modify MI as necessary to handle as much of 'Offset' as possible
+  Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg);
+  if (rewriteARM64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
+    return;
+
+  assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) &&
+         "Emergency spill slot is out of reach");
+
+  // If we get here, the immediate doesn't fit into the instruction.  We folded
+  // as much as possible above.  Handle the rest, providing a register that is
+  // SP+LargeImm.
+  unsigned ScratchReg =
+      MF.getRegInfo().createVirtualRegister(&ARM64::GPR64RegClass);
+  emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
+  MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
+}
+
+namespace llvm {
+
+unsigned ARM64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+                                                MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  switch (RC->getID()) {
+  default:
+    return 0;
+  case ARM64::GPR32RegClassID:
+  case ARM64::GPR32spRegClassID:
+  case ARM64::GPR32allRegClassID:
+  case ARM64::GPR64spRegClassID:
+  case ARM64::GPR64allRegClassID:
+  case ARM64::GPR64RegClassID:
+  case ARM64::GPR32commonRegClassID:
+  case ARM64::GPR64commonRegClassID:
+    return 32 - 1                                      // XZR/SP
+           - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
+           - STI->isTargetDarwin() // X18 reserved as platform register
+           - hasBasePointer(MF);   // X19
+  case ARM64::FPR8RegClassID:
+  case ARM64::FPR16RegClassID:
+  case ARM64::FPR32RegClassID:
+  case ARM64::FPR64RegClassID:
+  case ARM64::FPR128RegClassID:
+    return 32;
+
+  case ARM64::DDRegClassID:
+  case ARM64::DDDRegClassID:
+  case ARM64::DDDDRegClassID:
+  case ARM64::QQRegClassID:
+  case ARM64::QQQRegClassID:
+  case ARM64::QQQQRegClassID:
+    return 32;
+
+  case ARM64::FPR128_loRegClassID:
+    return 16;
+  }
+}
+
+} // namespace llvm

Added: llvm/trunk/lib/Target/ARM64/ARM64RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64RegisterInfo.h?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64RegisterInfo.h (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64RegisterInfo.h Sat Mar 29 05:18:08 2014
@@ -0,0 +1,89 @@
+//===- ARM64RegisterInfo.h - ARM64 Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM64 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM64REGISTERINFO_H
+#define LLVM_TARGET_ARM64REGISTERINFO_H
+
+#define GET_REGINFO_HEADER
+#include "ARM64GenRegisterInfo.inc"
+
+namespace llvm {
+
+class ARM64InstrInfo;
+class ARM64Subtarget;
+class MachineFunction;
+class RegScavenger;
+class TargetRegisterClass;
+
+struct ARM64RegisterInfo : public ARM64GenRegisterInfo {
+private:
+  const ARM64InstrInfo *TII;
+  const ARM64Subtarget *STI;
+
+public:
+  ARM64RegisterInfo(const ARM64InstrInfo *tii, const ARM64Subtarget *sti);
+
+  /// Code Generation virtual methods...
+  bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+  const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+  const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+
+  // Calls involved in thread-local variable lookup save more registers than
+  // normal calls, so they need a different mask to represent this.
+  const uint32_t *getTLSCallPreservedMask() const;
+
+  /// getThisReturnPreservedMask - Returns a call preserved mask specific to the
+  /// case that 'returned' is on an i64 first argument if the calling convention
+  /// is one that can (partially) model this attribute with a preserved mask
+  /// (i.e. it is a calling convention that uses the same register for the first
+  /// i64 argument and an i64 return value)
+  ///
+  /// Should return NULL in the case that the calling convention does not have
+  /// this property
+  const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+  const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
+                                                unsigned Kind = 0) const;
+  const TargetRegisterClass *
+  getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+  bool requiresRegisterScavenging(const MachineFunction &MF) const;
+  bool useFPForScavengingIndex(const MachineFunction &MF) const;
+  bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+
+  bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+  bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
+  void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg,
+                                    int FrameIdx, int64_t Offset) const;
+  void resolveFrameIndex(MachineBasicBlock::iterator I, unsigned BaseReg,
+                         int64_t Offset) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                           unsigned FIOperandNum,
+                           RegScavenger *RS = NULL) const;
+
+  bool cannotEliminateFrame(const MachineFunction &MF) const;
+  bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
+  bool hasBasePointer(const MachineFunction &MF) const;
+  unsigned getBaseRegister() const;
+
+  // Debug information queries.
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                               MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_ARM64REGISTERINFO_H

Added: llvm/trunk/lib/Target/ARM64/ARM64RegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64RegisterInfo.td?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64RegisterInfo.td (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64RegisterInfo.td Sat Mar 29 05:18:08 2014
@@ -0,0 +1,561 @@
+//===- ARM64RegisterInfo.td - Describe the ARM64 Regisers --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+
+class ARM64Reg<bits<16> enc, string n, list<Register> subregs = [],
+               list<string> altNames = []>
+        : Register<n, altNames> {
+  let HWEncoding = enc;
+  let Namespace = "ARM64";
+  let SubRegs = subregs;
+}
+
+let Namespace = "ARM64" in {
+  def sub_32 : SubRegIndex<32>;
+
+  def bsub : SubRegIndex<8>;
+  def hsub : SubRegIndex<16>;
+  def ssub : SubRegIndex<32>;
+  def dsub : SubRegIndex<32>;
+  def qhisub : SubRegIndex<64>;
+  def qsub : SubRegIndex<64>;
+  // Note: Code depends on these having consecutive numbers
+  def dsub0 : SubRegIndex<64>;
+  def dsub1 : SubRegIndex<64>;
+  def dsub2 : SubRegIndex<64>;
+  def dsub3 : SubRegIndex<64>;
+  // Note: Code depends on these having consecutive numbers
+  def qsub0 : SubRegIndex<128>;
+  def qsub1 : SubRegIndex<128>;
+  def qsub2 : SubRegIndex<128>;
+  def qsub3 : SubRegIndex<128>;
+}
+
+let Namespace = "ARM64" in {
+  def vreg : RegAltNameIndex;
+  def vlist1 : RegAltNameIndex;
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+def W0    : ARM64Reg<0,   "w0" >, DwarfRegNum<[0]>;
+def W1    : ARM64Reg<1,   "w1" >, DwarfRegNum<[1]>;
+def W2    : ARM64Reg<2,   "w2" >, DwarfRegNum<[2]>;
+def W3    : ARM64Reg<3,   "w3" >, DwarfRegNum<[3]>;
+def W4    : ARM64Reg<4,   "w4" >, DwarfRegNum<[4]>;
+def W5    : ARM64Reg<5,   "w5" >, DwarfRegNum<[5]>;
+def W6    : ARM64Reg<6,   "w6" >, DwarfRegNum<[6]>;
+def W7    : ARM64Reg<7,   "w7" >, DwarfRegNum<[7]>;
+def W8    : ARM64Reg<8,   "w8" >, DwarfRegNum<[8]>;
+def W9    : ARM64Reg<9,   "w9" >, DwarfRegNum<[9]>;
+def W10   : ARM64Reg<10, "w10">, DwarfRegNum<[10]>;
+def W11   : ARM64Reg<11, "w11">, DwarfRegNum<[11]>;
+def W12   : ARM64Reg<12, "w12">, DwarfRegNum<[12]>;
+def W13   : ARM64Reg<13, "w13">, DwarfRegNum<[13]>;
+def W14   : ARM64Reg<14, "w14">, DwarfRegNum<[14]>;
+def W15   : ARM64Reg<15, "w15">, DwarfRegNum<[15]>;
+def W16   : ARM64Reg<16, "w16">, DwarfRegNum<[16]>;
+def W17   : ARM64Reg<17, "w17">, DwarfRegNum<[17]>;
+def W18   : ARM64Reg<18, "w18">, DwarfRegNum<[18]>;
+def W19   : ARM64Reg<19, "w19">, DwarfRegNum<[19]>;
+def W20   : ARM64Reg<20, "w20">, DwarfRegNum<[20]>;
+def W21   : ARM64Reg<21, "w21">, DwarfRegNum<[21]>;
+def W22   : ARM64Reg<22, "w22">, DwarfRegNum<[22]>;
+def W23   : ARM64Reg<23, "w23">, DwarfRegNum<[23]>;
+def W24   : ARM64Reg<24, "w24">, DwarfRegNum<[24]>;
+def W25   : ARM64Reg<25, "w25">, DwarfRegNum<[25]>;
+def W26   : ARM64Reg<26, "w26">, DwarfRegNum<[26]>;
+def W27   : ARM64Reg<27, "w27">, DwarfRegNum<[27]>;
+def W28   : ARM64Reg<28, "w28">, DwarfRegNum<[28]>;
+def W29   : ARM64Reg<29, "w29">, DwarfRegNum<[29]>;
+def W30   : ARM64Reg<30, "w30">, DwarfRegNum<[30]>;
+def WSP   : ARM64Reg<31, "wsp">, DwarfRegNum<[31]>;
+def WZR   : ARM64Reg<31, "wzr">, DwarfRegAlias<WSP>;
+
+let SubRegIndices = [sub_32] in {
+def X0    : ARM64Reg<0,   "x0",  [W0]>, DwarfRegAlias<W0>;
+def X1    : ARM64Reg<1,   "x1",  [W1]>, DwarfRegAlias<W1>;
+def X2    : ARM64Reg<2,   "x2",  [W2]>, DwarfRegAlias<W2>;
+def X3    : ARM64Reg<3,   "x3",  [W3]>, DwarfRegAlias<W3>;
+def X4    : ARM64Reg<4,   "x4",  [W4]>, DwarfRegAlias<W4>;
+def X5    : ARM64Reg<5,   "x5",  [W5]>, DwarfRegAlias<W5>;
+def X6    : ARM64Reg<6,   "x6",  [W6]>, DwarfRegAlias<W6>;
+def X7    : ARM64Reg<7,   "x7",  [W7]>, DwarfRegAlias<W7>;
+def X8    : ARM64Reg<8,   "x8",  [W8]>, DwarfRegAlias<W8>;
+def X9    : ARM64Reg<9,   "x9",  [W9]>, DwarfRegAlias<W9>;
+def X10   : ARM64Reg<10, "x10", [W10]>, DwarfRegAlias<W10>;
+def X11   : ARM64Reg<11, "x11", [W11]>, DwarfRegAlias<W11>;
+def X12   : ARM64Reg<12, "x12", [W12]>, DwarfRegAlias<W12>;
+def X13   : ARM64Reg<13, "x13", [W13]>, DwarfRegAlias<W13>;
+def X14   : ARM64Reg<14, "x14", [W14]>, DwarfRegAlias<W14>;
+def X15   : ARM64Reg<15, "x15", [W15]>, DwarfRegAlias<W15>;
+def X16   : ARM64Reg<16, "x16", [W16]>, DwarfRegAlias<W16>;
+def X17   : ARM64Reg<17, "x17", [W17]>, DwarfRegAlias<W17>;
+def X18   : ARM64Reg<18, "x18", [W18]>, DwarfRegAlias<W18>;
+def X19   : ARM64Reg<19, "x19", [W19]>, DwarfRegAlias<W19>;
+def X20   : ARM64Reg<20, "x20", [W20]>, DwarfRegAlias<W20>;
+def X21   : ARM64Reg<21, "x21", [W21]>, DwarfRegAlias<W21>;
+def X22   : ARM64Reg<22, "x22", [W22]>, DwarfRegAlias<W22>;
+def X23   : ARM64Reg<23, "x23", [W23]>, DwarfRegAlias<W23>;
+def X24   : ARM64Reg<24, "x24", [W24]>, DwarfRegAlias<W24>;
+def X25   : ARM64Reg<25, "x25", [W25]>, DwarfRegAlias<W25>;
+def X26   : ARM64Reg<26, "x26", [W26]>, DwarfRegAlias<W26>;
+def X27   : ARM64Reg<27, "x27", [W27]>, DwarfRegAlias<W27>;
+def X28   : ARM64Reg<28, "x28", [W28]>, DwarfRegAlias<W28>;
+def FP    : ARM64Reg<29, "fp",  [W29]>, DwarfRegAlias<W29>;
+def LR    : ARM64Reg<30, "lr",  [W30]>, DwarfRegAlias<W30>;
+def SP    : ARM64Reg<31, "sp",  [WSP]>, DwarfRegAlias<WSP>;
+def XZR   : ARM64Reg<31, "xzr", [WZR]>, DwarfRegAlias<WSP>;
+}
+
+// Condition code register.
+def CPSR  : ARM64Reg<0, "cpsr">;
+
+// GPR register classes with the intersections of GPR32/GPR32sp and
+// GPR64/GPR64sp for use by the coalescer.
+def GPR32common : RegisterClass<"ARM64", [i32], 32, (sequence "W%u", 0, 30)> {
+  let AltOrders = [(rotl GPR32common, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
+def GPR64common : RegisterClass<"ARM64", [i64], 64,
+                                (add (sequence "X%u", 0, 28), FP, LR)> {
+  let AltOrders = [(rotl GPR64common, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
+// GPR register classes which exclude SP/WSP.
+def GPR32 : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR)> {
+  let AltOrders = [(rotl GPR32, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
+def GPR64 : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR)> {
+  let AltOrders = [(rotl GPR64, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
+
+// GPR register classes which include SP/WSP.
+def GPR32sp : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WSP)> {
+  let AltOrders = [(rotl GPR32sp, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
+def GPR64sp : RegisterClass<"ARM64", [i64], 64, (add GPR64common, SP)> {
+  let AltOrders = [(rotl GPR64sp, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
+
+// GPR register classes which include WZR/XZR AND SP/WSP. This is not a
+// constraint used by any instructions, it is used as a common super-class.
+def GPR32all : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR, WSP)>;
+def GPR64all : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR, SP)>;
+
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// This is for indirect tail calls to store the address of the destination.
+def tcGPR64 : RegisterClass<"ARM64", [i64], 64, (sub GPR64common, X19, X20, X21,
+                                                     X22, X23, X24, X25, X26,
+                                                     X27, X28)>;
+
+// GPR register classes for post increment ammount of vector load/store that
+// has alternate printing when Rm=31 and prints a constant immediate value
+// equal to the total number of bytes transferred.
+def GPR64pi1  : RegisterOperand<GPR64, "printPostIncOperand1">;
+def GPR64pi2  : RegisterOperand<GPR64, "printPostIncOperand2">;
+def GPR64pi3  : RegisterOperand<GPR64, "printPostIncOperand3">;
+def GPR64pi4  : RegisterOperand<GPR64, "printPostIncOperand4">;
+def GPR64pi6  : RegisterOperand<GPR64, "printPostIncOperand6">;
+def GPR64pi8  : RegisterOperand<GPR64, "printPostIncOperand8">;
+def GPR64pi12 : RegisterOperand<GPR64, "printPostIncOperand12">;
+def GPR64pi16 : RegisterOperand<GPR64, "printPostIncOperand16">;
+def GPR64pi24 : RegisterOperand<GPR64, "printPostIncOperand24">;
+def GPR64pi32 : RegisterOperand<GPR64, "printPostIncOperand32">;
+def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand48">;
+def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand64">;
+
+// Condition code regclass.
+def CCR : RegisterClass<"ARM64", [i32], 32, (add CPSR)> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+
+  // CCR is not allocatable.
+  let isAllocatable = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Scalar Registers
+//===----------------------------------------------------------------------===//
+
+def B0    : ARM64Reg<0,   "b0">, DwarfRegNum<[64]>;
+def B1    : ARM64Reg<1,   "b1">, DwarfRegNum<[65]>;
+def B2    : ARM64Reg<2,   "b2">, DwarfRegNum<[66]>;
+def B3    : ARM64Reg<3,   "b3">, DwarfRegNum<[67]>;
+def B4    : ARM64Reg<4,   "b4">, DwarfRegNum<[68]>;
+def B5    : ARM64Reg<5,   "b5">, DwarfRegNum<[69]>;
+def B6    : ARM64Reg<6,   "b6">, DwarfRegNum<[70]>;
+def B7    : ARM64Reg<7,   "b7">, DwarfRegNum<[71]>;
+def B8    : ARM64Reg<8,   "b8">, DwarfRegNum<[72]>;
+def B9    : ARM64Reg<9,   "b9">, DwarfRegNum<[73]>;
+def B10   : ARM64Reg<10, "b10">, DwarfRegNum<[74]>;
+def B11   : ARM64Reg<11, "b11">, DwarfRegNum<[75]>;
+def B12   : ARM64Reg<12, "b12">, DwarfRegNum<[76]>;
+def B13   : ARM64Reg<13, "b13">, DwarfRegNum<[77]>;
+def B14   : ARM64Reg<14, "b14">, DwarfRegNum<[78]>;
+def B15   : ARM64Reg<15, "b15">, DwarfRegNum<[79]>;
+def B16   : ARM64Reg<16, "b16">, DwarfRegNum<[80]>;
+def B17   : ARM64Reg<17, "b17">, DwarfRegNum<[81]>;
+def B18   : ARM64Reg<18, "b18">, DwarfRegNum<[82]>;
+def B19   : ARM64Reg<19, "b19">, DwarfRegNum<[83]>;
+def B20   : ARM64Reg<20, "b20">, DwarfRegNum<[84]>;
+def B21   : ARM64Reg<21, "b21">, DwarfRegNum<[85]>;
+def B22   : ARM64Reg<22, "b22">, DwarfRegNum<[86]>;
+def B23   : ARM64Reg<23, "b23">, DwarfRegNum<[87]>;
+def B24   : ARM64Reg<24, "b24">, DwarfRegNum<[88]>;
+def B25   : ARM64Reg<25, "b25">, DwarfRegNum<[89]>;
+def B26   : ARM64Reg<26, "b26">, DwarfRegNum<[90]>;
+def B27   : ARM64Reg<27, "b27">, DwarfRegNum<[91]>;
+def B28   : ARM64Reg<28, "b28">, DwarfRegNum<[92]>;
+def B29   : ARM64Reg<29, "b29">, DwarfRegNum<[93]>;
+def B30   : ARM64Reg<30, "b30">, DwarfRegNum<[94]>;
+def B31   : ARM64Reg<31, "b31">, DwarfRegNum<[95]>;
+
+let SubRegIndices = [bsub] in {
+def H0    : ARM64Reg<0,   "h0", [B0]>, DwarfRegAlias<B0>;
+def H1    : ARM64Reg<1,   "h1", [B1]>, DwarfRegAlias<B1>;
+def H2    : ARM64Reg<2,   "h2", [B2]>, DwarfRegAlias<B2>;
+def H3    : ARM64Reg<3,   "h3", [B3]>, DwarfRegAlias<B3>;
+def H4    : ARM64Reg<4,   "h4", [B4]>, DwarfRegAlias<B4>;
+def H5    : ARM64Reg<5,   "h5", [B5]>, DwarfRegAlias<B5>;
+def H6    : ARM64Reg<6,   "h6", [B6]>, DwarfRegAlias<B6>;
+def H7    : ARM64Reg<7,   "h7", [B7]>, DwarfRegAlias<B7>;
+def H8    : ARM64Reg<8,   "h8", [B8]>, DwarfRegAlias<B8>;
+def H9    : ARM64Reg<9,   "h9", [B9]>, DwarfRegAlias<B9>;
+def H10   : ARM64Reg<10, "h10", [B10]>, DwarfRegAlias<B10>;
+def H11   : ARM64Reg<11, "h11", [B11]>, DwarfRegAlias<B11>;
+def H12   : ARM64Reg<12, "h12", [B12]>, DwarfRegAlias<B12>;
+def H13   : ARM64Reg<13, "h13", [B13]>, DwarfRegAlias<B13>;
+def H14   : ARM64Reg<14, "h14", [B14]>, DwarfRegAlias<B14>;
+def H15   : ARM64Reg<15, "h15", [B15]>, DwarfRegAlias<B15>;
+def H16   : ARM64Reg<16, "h16", [B16]>, DwarfRegAlias<B16>;
+def H17   : ARM64Reg<17, "h17", [B17]>, DwarfRegAlias<B17>;
+def H18   : ARM64Reg<18, "h18", [B18]>, DwarfRegAlias<B18>;
+def H19   : ARM64Reg<19, "h19", [B19]>, DwarfRegAlias<B19>;
+def H20   : ARM64Reg<20, "h20", [B20]>, DwarfRegAlias<B20>;
+def H21   : ARM64Reg<21, "h21", [B21]>, DwarfRegAlias<B21>;
+def H22   : ARM64Reg<22, "h22", [B22]>, DwarfRegAlias<B22>;
+def H23   : ARM64Reg<23, "h23", [B23]>, DwarfRegAlias<B23>;
+def H24   : ARM64Reg<24, "h24", [B24]>, DwarfRegAlias<B24>;
+def H25   : ARM64Reg<25, "h25", [B25]>, DwarfRegAlias<B25>;
+def H26   : ARM64Reg<26, "h26", [B26]>, DwarfRegAlias<B26>;
+def H27   : ARM64Reg<27, "h27", [B27]>, DwarfRegAlias<B27>;
+def H28   : ARM64Reg<28, "h28", [B28]>, DwarfRegAlias<B28>;
+def H29   : ARM64Reg<29, "h29", [B29]>, DwarfRegAlias<B29>;
+def H30   : ARM64Reg<30, "h30", [B30]>, DwarfRegAlias<B30>;
+def H31   : ARM64Reg<31, "h31", [B31]>, DwarfRegAlias<B31>;
+}
+
+let SubRegIndices = [hsub] in {
+def S0    : ARM64Reg<0,   "s0", [H0]>, DwarfRegAlias<B0>;
+def S1    : ARM64Reg<1,   "s1", [H1]>, DwarfRegAlias<B1>;
+def S2    : ARM64Reg<2,   "s2", [H2]>, DwarfRegAlias<B2>;
+def S3    : ARM64Reg<3,   "s3", [H3]>, DwarfRegAlias<B3>;
+def S4    : ARM64Reg<4,   "s4", [H4]>, DwarfRegAlias<B4>;
+def S5    : ARM64Reg<5,   "s5", [H5]>, DwarfRegAlias<B5>;
+def S6    : ARM64Reg<6,   "s6", [H6]>, DwarfRegAlias<B6>;
+def S7    : ARM64Reg<7,   "s7", [H7]>, DwarfRegAlias<B7>;
+def S8    : ARM64Reg<8,   "s8", [H8]>, DwarfRegAlias<B8>;
+def S9    : ARM64Reg<9,   "s9", [H9]>, DwarfRegAlias<B9>;
+def S10   : ARM64Reg<10, "s10", [H10]>, DwarfRegAlias<B10>;
+def S11   : ARM64Reg<11, "s11", [H11]>, DwarfRegAlias<B11>;
+def S12   : ARM64Reg<12, "s12", [H12]>, DwarfRegAlias<B12>;
+def S13   : ARM64Reg<13, "s13", [H13]>, DwarfRegAlias<B13>;
+def S14   : ARM64Reg<14, "s14", [H14]>, DwarfRegAlias<B14>;
+def S15   : ARM64Reg<15, "s15", [H15]>, DwarfRegAlias<B15>;
+def S16   : ARM64Reg<16, "s16", [H16]>, DwarfRegAlias<B16>;
+def S17   : ARM64Reg<17, "s17", [H17]>, DwarfRegAlias<B17>;
+def S18   : ARM64Reg<18, "s18", [H18]>, DwarfRegAlias<B18>;
+def S19   : ARM64Reg<19, "s19", [H19]>, DwarfRegAlias<B19>;
+def S20   : ARM64Reg<20, "s20", [H20]>, DwarfRegAlias<B20>;
+def S21   : ARM64Reg<21, "s21", [H21]>, DwarfRegAlias<B21>;
+def S22   : ARM64Reg<22, "s22", [H22]>, DwarfRegAlias<B22>;
+def S23   : ARM64Reg<23, "s23", [H23]>, DwarfRegAlias<B23>;
+def S24   : ARM64Reg<24, "s24", [H24]>, DwarfRegAlias<B24>;
+def S25   : ARM64Reg<25, "s25", [H25]>, DwarfRegAlias<B25>;
+def S26   : ARM64Reg<26, "s26", [H26]>, DwarfRegAlias<B26>;
+def S27   : ARM64Reg<27, "s27", [H27]>, DwarfRegAlias<B27>;
+def S28   : ARM64Reg<28, "s28", [H28]>, DwarfRegAlias<B28>;
+def S29   : ARM64Reg<29, "s29", [H29]>, DwarfRegAlias<B29>;
+def S30   : ARM64Reg<30, "s30", [H30]>, DwarfRegAlias<B30>;
+def S31   : ARM64Reg<31, "s31", [H31]>, DwarfRegAlias<B31>;
+}
+
+let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in {
+def D0    : ARM64Reg<0,   "d0", [S0], ["v0", ""]>, DwarfRegAlias<B0>;
+def D1    : ARM64Reg<1,   "d1", [S1], ["v1", ""]>, DwarfRegAlias<B1>;
+def D2    : ARM64Reg<2,   "d2", [S2], ["v2", ""]>, DwarfRegAlias<B2>;
+def D3    : ARM64Reg<3,   "d3", [S3], ["v3", ""]>, DwarfRegAlias<B3>;
+def D4    : ARM64Reg<4,   "d4", [S4], ["v4", ""]>, DwarfRegAlias<B4>;
+def D5    : ARM64Reg<5,   "d5", [S5], ["v5", ""]>, DwarfRegAlias<B5>;
+def D6    : ARM64Reg<6,   "d6", [S6], ["v6", ""]>, DwarfRegAlias<B6>;
+def D7    : ARM64Reg<7,   "d7", [S7], ["v7", ""]>, DwarfRegAlias<B7>;
+def D8    : ARM64Reg<8,   "d8", [S8], ["v8", ""]>, DwarfRegAlias<B8>;
+def D9    : ARM64Reg<9,   "d9", [S9], ["v9", ""]>, DwarfRegAlias<B9>;
+def D10   : ARM64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias<B10>;
+def D11   : ARM64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias<B11>;
+def D12   : ARM64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias<B12>;
+def D13   : ARM64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias<B13>;
+def D14   : ARM64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias<B14>;
+def D15   : ARM64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias<B15>;
+def D16   : ARM64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias<B16>;
+def D17   : ARM64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias<B17>;
+def D18   : ARM64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias<B18>;
+def D19   : ARM64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias<B19>;
+def D20   : ARM64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias<B20>;
+def D21   : ARM64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias<B21>;
+def D22   : ARM64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias<B22>;
+def D23   : ARM64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias<B23>;
+def D24   : ARM64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias<B24>;
+def D25   : ARM64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias<B25>;
+def D26   : ARM64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias<B26>;
+def D27   : ARM64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias<B27>;
+def D28   : ARM64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias<B28>;
+def D29   : ARM64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias<B29>;
+def D30   : ARM64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias<B30>;
+def D31   : ARM64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias<B31>;
+}
+
+let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in {
+def Q0    : ARM64Reg<0,   "q0", [D0], ["v0", ""]>, DwarfRegAlias<B0>;
+def Q1    : ARM64Reg<1,   "q1", [D1], ["v1", ""]>, DwarfRegAlias<B1>;
+def Q2    : ARM64Reg<2,   "q2", [D2], ["v2", ""]>, DwarfRegAlias<B2>;
+def Q3    : ARM64Reg<3,   "q3", [D3], ["v3", ""]>, DwarfRegAlias<B3>;
+def Q4    : ARM64Reg<4,   "q4", [D4], ["v4", ""]>, DwarfRegAlias<B4>;
+def Q5    : ARM64Reg<5,   "q5", [D5], ["v5", ""]>, DwarfRegAlias<B5>;
+def Q6    : ARM64Reg<6,   "q6", [D6], ["v6", ""]>, DwarfRegAlias<B6>;
+def Q7    : ARM64Reg<7,   "q7", [D7], ["v7", ""]>, DwarfRegAlias<B7>;
+def Q8    : ARM64Reg<8,   "q8", [D8], ["v8", ""]>, DwarfRegAlias<B8>;
+def Q9    : ARM64Reg<9,   "q9", [D9], ["v9", ""]>, DwarfRegAlias<B9>;
+def Q10   : ARM64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias<B10>;
+def Q11   : ARM64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias<B11>;
+def Q12   : ARM64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias<B12>;
+def Q13   : ARM64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias<B13>;
+def Q14   : ARM64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias<B14>;
+def Q15   : ARM64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias<B15>;
+def Q16   : ARM64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias<B16>;
+def Q17   : ARM64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias<B17>;
+def Q18   : ARM64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias<B18>;
+def Q19   : ARM64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias<B19>;
+def Q20   : ARM64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias<B20>;
+def Q21   : ARM64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias<B21>;
+def Q22   : ARM64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias<B22>;
+def Q23   : ARM64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias<B23>;
+def Q24   : ARM64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias<B24>;
+def Q25   : ARM64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias<B25>;
+def Q26   : ARM64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias<B26>;
+def Q27   : ARM64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias<B27>;
+def Q28   : ARM64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias<B28>;
+def Q29   : ARM64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias<B29>;
+def Q30   : ARM64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>;
+def Q31   : ARM64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
+}
+
+def FPR8  : RegisterClass<"ARM64", [untyped], 8, (sequence "B%u", 0, 31)> {
+  let Size = 8;
+}
+def FPR16 : RegisterClass<"ARM64", [untyped], 16, (sequence "H%u", 0, 31)> {
+  let Size = 16;
+}
+def FPR32 : RegisterClass<"ARM64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
+def FPR64 : RegisterClass<"ARM64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
+                                    v1i64],
+                                    64, (sequence "D%u", 0, 31)>;
+// We don't (yet) have an f128 legal type, so don't use that here. We
+// normalize 128-bit vectors to v2f64 for arg passing and such, so use
+// that here.
+def FPR128 : RegisterClass<"ARM64",
+                           [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128],
+                           128, (sequence "Q%u", 0, 31)>;
+
+// The lower 16 vector registers.  Some instructions can only take registers
+// in this range.
+def FPR128_lo : RegisterClass<"ARM64",
+                              [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                              128, (trunc FPR128, 16)>;
+
+// Pairs, triples, and quads of 64-bit vector registers.
+def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>;
+def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2],
+                                 [(rotl FPR64, 0), (rotl FPR64, 1),
+                                  (rotl FPR64, 2)]>;
+def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3],
+                               [(rotl FPR64, 0), (rotl FPR64, 1),
+                                (rotl FPR64, 2), (rotl FPR64, 3)]>;
+def DD   : RegisterClass<"ARM64", [untyped], 64, (add DSeqPairs)> {
+  let Size = 128;
+}
+def DDD  : RegisterClass<"ARM64", [untyped], 64, (add DSeqTriples)> {
+  let Size = 196;
+}
+def DDDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqQuads)> {
+  let Size = 256;
+}
+
+// Pairs, triples, and quads of 128-bit vector registers.
+def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>;
+def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2],
+                                 [(rotl FPR128, 0), (rotl FPR128, 1),
+                                  (rotl FPR128, 2)]>;
+def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3],
+                               [(rotl FPR128, 0), (rotl FPR128, 1),
+                                (rotl FPR128, 2), (rotl FPR128, 3)]>;
+def QQ   : RegisterClass<"ARM64", [untyped], 128, (add QSeqPairs)> {
+  let Size = 256;
+}
+def QQQ  : RegisterClass<"ARM64", [untyped], 128, (add QSeqTriples)> {
+  let Size = 384;
+}
+def QQQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqQuads)> {
+  let Size = 512;
+}
+
+
+// Vector operand versions of the FP registers. Alternate name printing and
+// assmebler matching.
+def VectorRegAsmOperand : AsmOperandClass { let Name = "VectorReg"; }
+let ParserMatchClass = VectorRegAsmOperand in {
+def V64  : RegisterOperand<FPR64, "printVRegOperand">;
+def V128 : RegisterOperand<FPR128, "printVRegOperand">;
+def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand">;
+}
+
+class TypedVecListAsmOperand<int count, int regsize, int lanes, string kind>
+    : AsmOperandClass {
+  let Name = "TypedVectorList" # count # "_" # lanes # kind;
+
+  let PredicateMethod
+      = "isTypedVectorList<" # count # ", " # lanes # ", '" # kind # "'>";
+  let RenderMethod = "addVectorList" # regsize # "Operands<" # count # ">";
+}
+
+class TypedVecListRegOperand<RegisterClass Reg, int lanes, string kind>
+    : RegisterOperand<Reg, "printTypedVectorList<" # lanes # ", '"
+                                                   # kind # "'>">;
+
+multiclass VectorList<int count, RegisterClass Reg64, RegisterClass Reg128> {
+  // With implicit types (probably on instruction instead). E.g. { v0, v1 }
+  def _64AsmOperand : AsmOperandClass {
+    let Name = NAME # "64";
+    let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
+    let RenderMethod = "addVectorList64Operands<" # count # ">";
+  }
+
+  def "64" : RegisterOperand<Reg64, "printImplicitlyTypedVectorList"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_64AsmOperand");
+  }
+
+  def _128AsmOperand : AsmOperandClass {
+    let Name = NAME # "128";
+    let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
+    let RenderMethod = "addVectorList128Operands<" # count # ">";
+  }
+
+  def "128" : RegisterOperand<Reg128, "printImplicitlyTypedVectorList"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_128AsmOperand");
+  }
+
+  // 64-bit register lists with explicit type.
+
+  // { v0.8b, v1.8b }
+  def _8bAsmOperand : TypedVecListAsmOperand<count, 64, 8, "b">;
+  def "8b" : TypedVecListRegOperand<Reg64, 8, "b"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8bAsmOperand");
+  }
+
+  // { v0.4h, v1.4h }
+  def _4hAsmOperand : TypedVecListAsmOperand<count, 64, 4, "h">;
+  def "4h" : TypedVecListRegOperand<Reg64, 4, "h"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4hAsmOperand");
+  }
+
+  // { v0.2s, v1.2s }
+  def _2sAsmOperand : TypedVecListAsmOperand<count, 64, 2, "s">;
+  def "2s" : TypedVecListRegOperand<Reg64, 2, "s"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2sAsmOperand");
+  }
+
+  // { v0.1d, v1.1d }
+  def _1dAsmOperand : TypedVecListAsmOperand<count, 64, 1, "d">;
+  def "1d" : TypedVecListRegOperand<Reg64, 1, "d"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_1dAsmOperand");
+  }
+
+  // 128-bit register lists with explicit type
+
+  // { v0.16b, v1.16b }
+  def _16bAsmOperand : TypedVecListAsmOperand<count, 128, 16, "b">;
+  def "16b" : TypedVecListRegOperand<Reg128, 16, "b"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_16bAsmOperand");
+  }
+
+  // { v0.8h, v1.8h }
+  def _8hAsmOperand : TypedVecListAsmOperand<count, 128, 8, "h">;
+  def "8h" : TypedVecListRegOperand<Reg128, 8, "h"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8hAsmOperand");
+  }
+
+  // { v0.4s, v1.4s }
+  def _4sAsmOperand : TypedVecListAsmOperand<count, 128, 4, "s">;
+  def "4s" : TypedVecListRegOperand<Reg128, 4, "s"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4sAsmOperand");
+  }
+
+  // { v0.2d, v1.2d }
+  def _2dAsmOperand : TypedVecListAsmOperand<count, 128, 2, "d">;
+  def "2d" : TypedVecListRegOperand<Reg128, 2, "d"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2dAsmOperand");
+  }
+
+  // { v0.b, v1.b }
+  def _bAsmOperand : TypedVecListAsmOperand<count, 128, 0, "b">;
+  def "b" : TypedVecListRegOperand<Reg128, 0, "b"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_bAsmOperand");
+  }
+
+  // { v0.h, v1.h }
+  def _hAsmOperand : TypedVecListAsmOperand<count, 128, 0, "h">;
+  def "h" : TypedVecListRegOperand<Reg128, 0, "h"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_hAsmOperand");
+  }
+
+  // { v0.s, v1.s }
+  def _sAsmOperand : TypedVecListAsmOperand<count, 128, 0, "s">;
+  def "s" : TypedVecListRegOperand<Reg128, 0, "s"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_sAsmOperand");
+  }
+
+  // { v0.d, v1.d }
+  def _dAsmOperand : TypedVecListAsmOperand<count, 128, 0, "d">;
+  def "d" : TypedVecListRegOperand<Reg128, 0, "d"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_dAsmOperand");
+  }
+
+
+}
+
+defm VecListOne   : VectorList<1, FPR64, FPR128>;
+defm VecListTwo   : VectorList<2, DD,    QQ>;
+defm VecListThree : VectorList<3, DDD,   QQQ>;
+defm VecListFour  : VectorList<4, DDDD,  QQQQ>;
+
+
+// Register operand versions of the scalar FP registers.
+def FPR16Op : RegisterOperand<FPR16, "printOperand">;
+def FPR32Op : RegisterOperand<FPR32, "printOperand">;
+def FPR64Op : RegisterOperand<FPR64, "printOperand">;
+def FPR128Op : RegisterOperand<FPR128, "printOperand">;

Added: llvm/trunk/lib/Target/ARM64/ARM64SchedCyclone.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64SchedCyclone.td?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64SchedCyclone.td (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64SchedCyclone.td Sat Mar 29 05:18:08 2014
@@ -0,0 +1,852 @@
+//=- ARMSchedCyclone.td - ARM64 Cyclone Scheduling Defs ------*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for ARM64 Cyclone to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def CycloneModel : SchedMachineModel {
+  let IssueWidth = 6; // 6 micro-ops are dispatched per cycle.
+  let MicroOpBufferSize = 192; // Based on the reorder buffer.
+  let LoadLatency = 4; // Optimistic load latency.
+  let MispredictPenalty = 16; // 14-19 cycles are typical.
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Cyclone.
+
+// 4 integer pipes
+def CyUnitI : ProcResource<4> {
+  let BufferSize = 48;
+}
+
+// 2 branch units: I[0..1]
+def CyUnitB : ProcResource<2> {
+  let Super  = CyUnitI;
+  let BufferSize = 24;
+}
+
+// 1 indirect-branch unit: I[0]
+def CyUnitBR : ProcResource<1> {
+  let Super  = CyUnitB;
+}
+
+// 2 shifter pipes: I[2..3]
+// When an instruction consumes a CyUnitIS, it also consumes a CyUnitI
+def CyUnitIS : ProcResource<2> {
+  let Super = CyUnitI;
+  let BufferSize = 24;
+}
+
+// 1 mul pipe: I[0]
+def CyUnitIM : ProcResource<1> {
+  let Super = CyUnitBR;
+  let BufferSize = 32;
+}
+
+// 1 div pipe: I[1]
+def CyUnitID : ProcResource<1> {
+  let Super = CyUnitB;
+  let BufferSize = 16;
+}
+
+// 1 integer division unit. This is driven by the ID pipe, but only
+// consumes the pipe for one cycle at issue and another cycle at writeback.
+def CyUnitIntDiv : ProcResource<1>;
+
+// 2 ld/st pipes.
+def CyUnitLS : ProcResource<2> {
+  let BufferSize = 28;
+}
+
+// 3 fp/vector pipes.
+def CyUnitV : ProcResource<3> {
+  let BufferSize = 48;
+}
+// 2 fp/vector arithmetic and multiply pipes: V[0-1]
+def CyUnitVM : ProcResource<2> {
+  let Super = CyUnitV;
+  let BufferSize = 32;
+}
+// 1 fp/vector division/sqrt pipe: V[2]
+def CyUnitVD : ProcResource<1> {
+  let Super = CyUnitV;
+  let BufferSize = 16;
+}
+// 1 fp compare pipe: V[0]
+def CyUnitVC : ProcResource<1> {
+  let Super = CyUnitVM;
+  let BufferSize = 16;
+}
+
+// 2 fp division/square-root units.  These are driven by the VD pipe,
+// but only consume the pipe for one cycle at issue and a cycle at writeback.
+def CyUnitFloatDiv : ProcResource<2>;
+
+//===----------------------------------------------------------------------===//
+// Define scheduler read/write resources and latency on Cyclone.
+// This mirrors sections 7.7-7.9 of the Tuning Guide v1.0.1.
+
+let SchedModel = CycloneModel in {
+
+//---
+// 7.8.1. Moves
+//---
+
+// A single nop micro-op (uX).
+def WriteX : SchedWriteRes<[]> { let Latency = 0; }
+
+// Move zero is a register rename (to machine register zero).
+// The move is replaced by a single nop micro-op.
+// MOVZ Rd, #0
+// AND Rd, Rzr, #imm
+def WriteZPred : SchedPredicate<[{TII->isGPRZero(MI)}]>;
+def WriteImmZ  : SchedWriteVariant<[
+                   SchedVar<WriteZPred, [WriteX]>,
+                   SchedVar<NoSchedPred, [WriteImm]>]>;
+def : InstRW<[WriteImmZ], (instrs MOVZWi,MOVZXi,ANDWri,ANDXri)>;
+
+// Move GPR is a register rename and single nop micro-op.
+// ORR Xd, XZR, Xm
+// ADD Xd, Xn, #0
+def WriteIMovPred : SchedPredicate<[{TII->isGPRCopy(MI)}]>;
+def WriteVMovPred : SchedPredicate<[{TII->isFPRCopy(MI)}]>;
+def WriteMov      : SchedWriteVariant<[
+                      SchedVar<WriteIMovPred, [WriteX]>,
+                      SchedVar<WriteVMovPred, [WriteX]>,
+                      SchedVar<NoSchedPred,   [WriteI]>]>;
+def : InstRW<[WriteMov], (instrs COPY,ORRXrr,ADDXrr)>;
+
+// Move non-zero immediate is an integer ALU op.
+// MOVN,MOVZ,MOVK
+def : WriteRes<WriteImm, [CyUnitI]>;
+
+//---
+// 7.8.2-7.8.5. Arithmetic and Logical, Comparison, Conditional,
+//              Shifts and Bitfield Operations
+//---
+
+// ADR,ADRP
+// ADD(S)ri,SUB(S)ri,AND(S)ri,EORri,ORRri
+// ADD(S)rr,SUB(S)rr,AND(S)rr,BIC(S)rr,EONrr,EORrr,ORNrr,ORRrr
+// ADC(S),SBC(S)
+// Aliases: CMN, CMP, TST
+//
+// Conditional operations.
+// CCMNi,CCMPi,CCMNr,CCMPr,
+// CSEL,CSINC,CSINV,CSNEG
+//
+// Bit counting and reversal operations.
+// CLS,CLZ,RBIT,REV,REV16,REV32
+def : WriteRes<WriteI, [CyUnitI]>;
+
+// ADD with shifted register operand is a single micro-op that
+// consumes a shift pipeline for two cycles.
+// ADD(S)rs,SUB(S)rs,AND(S)rs,BIC(S)rs,EONrs,EORrs,ORNrs,ORRrs
+// EXAMPLE: ADDrs Xn, Xm LSL #imm
+def : WriteRes<WriteISReg, [CyUnitIS]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+}
+
+// ADD with extended register operand is the same as shifted reg operand.
+// ADD(S)re,SUB(S)re
+// EXAMPLE: ADDXre Xn, Xm, UXTB #1
+def : WriteRes<WriteIEReg, [CyUnitIS]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+}
+
+// Variable shift and bitfield operations.
+// ASRV,LSLV,LSRV,RORV,BFM,SBFM,UBFM
+def : WriteRes<WriteIS, [CyUnitIS]>;
+
+// EXTR Shifts a pair of registers and requires two micro-ops.
+// The second micro-op is delayed, as modeled by ReadExtrHi.
+// EXTR Xn, Xm, #imm
+def : WriteRes<WriteExtr, [CyUnitIS, CyUnitIS]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+// EXTR's first register read is delayed by one cycle, effectively
+// shortening its writer's latency.
+// EXTR Xn, Xm, #imm
+def : ReadAdvance<ReadExtrHi, 1>;
+
+//---
+// 7.8.6. Multiplies
+//---
+
+// MUL/MNEG are aliases for MADD/MSUB.
+// MADDW,MSUBW,SMADDL,SMSUBL,UMADDL,UMSUBL
+def : WriteRes<WriteIM32, [CyUnitIM]> {
+  let Latency = 4;
+}
+// MADDX,MSUBX,SMULH,UMULH
+def : WriteRes<WriteIM64, [CyUnitIM]> {
+  let Latency = 5;
+}
+
+//---
+// 7.8.7. Divide
+//---
+
+// 32-bit divide takes 7-13 cycles. 10 cycles covers a 20-bit quotient.
+// The ID pipe is consumed for 2 cycles: issue and writeback.
+// SDIVW,UDIVW
+def : WriteRes<WriteID32, [CyUnitID, CyUnitIntDiv]> {
+  let Latency = 10;
+  let ResourceCycles = [2, 10];
+}
+// 64-bit divide takes 7-21 cycles. 13 cycles covers a 32-bit quotient.
+// The ID pipe is consumed for 2 cycles: issue and writeback.
+// SDIVX,UDIVX
+def : WriteRes<WriteID64, [CyUnitID, CyUnitIntDiv]> {
+  let Latency = 13;
+  let ResourceCycles = [2, 13];
+}
+
+//---
+// 7.8.8,7.8.10. Load/Store, single element
+//---
+
+// Integer loads take 4 cycles and use one LS unit for one cycle.
+def : WriteRes<WriteLD, [CyUnitLS]> {
+  let Latency = 4;
+}
+
+// Store-load forwarding is 4 cycles.
+//
+// Note: The store-exclusive sequence incorporates this
+// latency. However, general heuristics should not model the
+// dependence between a store and subsequent may-alias load because
+// hardware speculation works.
+def : WriteRes<WriteST, [CyUnitLS]> {
+  let Latency = 4;
+}
+
+// Load from base address plus an optionally scaled register offset.
+// Rt latency is latency WriteIS + WriteLD.
+// EXAMPLE: LDR Xn, Xm [, lsl 3]
+def CyWriteLDIdx : SchedWriteVariant<[
+  SchedVar<ScaledIdxPred, [WriteIS, WriteLD]>, // Load from scaled register.
+  SchedVar<NoSchedPred,   [WriteLD]>]>;        // Load from register offset.
+def : SchedAlias<WriteLDIdx, CyWriteLDIdx>;    // Map ARM64->Cyclone type.
+
+// EXAMPLE: STR Xn, Xm [, lsl 3]
+def CyWriteSTIdx : SchedWriteVariant<[
+  SchedVar<ScaledIdxPred, [WriteIS, WriteST]>, // Store to scaled register.
+  SchedVar<NoSchedPred,   [WriteST]>]>;        // Store to register offset.
+def : SchedAlias<WriteSTIdx, CyWriteSTIdx>;    // Map ARM64->Cyclone type.
+
+// Read the (unshifted) base register Xn in the second micro-op one cycle later.
+// EXAMPLE: LDR Xn, Xm [, lsl 3]
+def ReadBaseRS : SchedReadAdvance<1>;
+def CyReadAdrBase : SchedReadVariant<[
+  SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset.
+  SchedVar<NoSchedPred,   [ReadDefault]>]>;   // Read base reg with no shift.
+def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map ARM64->Cyclone type.
+
+//---
+// 7.8.9,7.8.11. Load/Store, paired
+//---
+
+// Address pre/post increment is a simple ALU op with one cycle latency.
+def : WriteRes<WriteAdr, [CyUnitI]>;
+
+// LDP high register write is fused with the load, but a nop micro-op remains.
+def : WriteRes<WriteLDHi, []> {
+  let Latency = 4;
+}
+
+// STP is a vector op and store, except for QQ, which is just two stores.
+def : SchedAlias<WriteSTP, WriteVSTShuffle>;
+def : InstRW<[WriteST, WriteST], (instrs STPQi)>;
+
+//---
+// 7.8.13. Branches
+//---
+
+// Branches take a single micro-op.
+// The misprediction penalty is defined as a SchedMachineModel property.
+def : WriteRes<WriteBr,    [CyUnitB]>  {let Latency = 0;}
+def : WriteRes<WriteBrReg, [CyUnitBR]> {let Latency = 0;}
+
+//---
+// 7.8.14. Never-issued Instructions, Barrier and Hint Operations
+//---
+
+// NOP,SEV,SEVL,WFE,WFI,YIELD
+def : WriteRes<WriteHint, []> {let Latency = 0;}
+// ISB
+def : InstRW<[WriteI], (instrs ISB)>;
+// SLREX,DMB,DSB
+def : WriteRes<WriteBarrier, [CyUnitLS]>;
+
+// System instructions get an invalid latency because the latency of
+// other operations across them is meaningless.
+def : WriteRes<WriteSys, []> {let Latency = -1;}
+
+//===----------------------------------------------------------------------===//
+// 7.9 Vector Unit Instructions
+
+// Simple vector operations take 2 cycles.
+def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;}
+
+// Define some longer latency vector op types for Cyclone.
+def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
+def CyWriteV4 : SchedWriteRes<[CyUnitV]> {let Latency = 4;}
+def CyWriteV5 : SchedWriteRes<[CyUnitV]> {let Latency = 5;}
+def CyWriteV6 : SchedWriteRes<[CyUnitV]> {let Latency = 6;}
+
+// Simple floating-point operations take 2 cycles.
+def : WriteRes<WriteF, [CyUnitV]> {let Latency = 2;}
+
+//---
+// 7.9.1 Vector Moves
+//---
+
+// TODO: Add Cyclone-specific zero-cycle zeros. LLVM currently
+// generates expensive int-float conversion instead:
+// FMOVDi Dd, #0.0
+// FMOVv2f64ns Vd.2d, #0.0
+
+// FMOVSi,FMOVDi
+def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;}
+
+// MOVI,MVNI are WriteV
+// FMOVv2f32ns,FMOVv2f64ns,FMOVv4f32ns are WriteV
+
+// Move FPR is a register rename and single nop micro-op.
+// ORR.16b Vd,Vn,Vn
+// COPY is handled above in the WriteMov Variant.
+def WriteVMov    : SchedWriteVariant<[
+                     SchedVar<WriteVMovPred, [WriteX]>,
+                     SchedVar<NoSchedPred,   [WriteV]>]>;
+def : InstRW<[WriteVMov], (instrs ORRv16i8)>;
+
+// FMOVSr,FMOVDr are WriteF.
+
+// MOV V,V is a WriteV.
+
+// CPY D,V[x] is a WriteV
+
+// INS V[x],V[y] is a WriteV.
+
+// FMOVWSr,FMOVXDr,FMOVXDHighr
+def : SchedAlias<WriteFCopy, WriteVLD>;
+
+// FMOVSWr,FMOVDXr
+def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;
+
+// INS V[x],R
+def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>;
+def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>;
+
+// SMOV,UMOV R,V[x]
+def CyWriteCopyToGPR : WriteSequence<[WriteLD, WriteI]>;
+def : InstRW<[CyWriteCopyToGPR], (instregex "SMOVv","UMOVv")>;
+
+// DUP V,R
+def : InstRW<[CyWriteCopyToFPR], (instregex "DUPv")>;
+
+// DUP V,V[x] is a WriteV.
+
+//---
+// 7.9.2 Integer Arithmetic, Logical, and Comparisons
+//---
+
+// BIC,ORR V,#imm are WriteV
+
+def : InstRW<[CyWriteV3], (instregex "ABSv")>;
+
+// MVN,NEG,NOT are WriteV
+
+def : InstRW<[CyWriteV3], (instregex "SQABSv","SQNEGv")>;
+
+// ADDP is a WriteV.
+def CyWriteVADDLP : SchedWriteRes<[CyUnitV]> {let Latency = 2;}
+def : InstRW<[CyWriteVADDLP], (instregex "SADDLPv","UADDLPv")>;
+
+def : InstRW<[CyWriteV3],
+             (instregex "ADDVv","SMAXVv","UMAXVv","SMINVv","UMINVv")>;
+
+def : InstRW<[CyWriteV3], (instregex "SADDLV","UADDLV")>;
+
+// ADD,SUB are WriteV
+
+// Forward declare.
+def CyWriteVABD : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
+
+// Add/Diff and accumulate uses the vector multiply unit.
+def CyWriteVAccum : SchedWriteRes<[CyUnitVM]> {let Latency = 3;}
+def CyReadVAccum  : SchedReadAdvance<1,
+                    [CyWriteVAccum, CyWriteVADDLP, CyWriteVABD]>;
+
+def : InstRW<[CyWriteVAccum, CyReadVAccum],
+             (instregex "SADALP","UADALP")>;
+
+def : InstRW<[CyWriteVAccum, CyReadVAccum],
+             (instregex "SABAv","UABAv","SABALv","UABALv")>;
+
+def : InstRW<[CyWriteV3], (instregex "SQADDv","SQSUBv","UQADDv","UQSUBv")>;
+
+def : InstRW<[CyWriteV3], (instregex "SUQADDv","USQADDv")>;
+
+def : InstRW<[CyWriteV4], (instregex "ADDHNv","RADDHNv", "RSUBHNv", "SUBHNv")>;
+
+// WriteV includes:
+// AND,BIC,CMTST,EOR,ORN,ORR
+// ADDP
+// SHADD,SHSUB,SRHADD,UHADD,UHSUB,URHADD
+// SADDL,SSUBL,UADDL,USUBL
+// SADDW,SSUBW,UADDW,USUBW
+
+def : InstRW<[CyWriteV3], (instregex "CMEQv","CMGEv","CMGTv",
+                                     "CMLEv","CMLTv",
+                                     "CMHIv","CMHSv")>;
+
+def : InstRW<[CyWriteV3], (instregex "SMAXv","SMINv","UMAXv","UMINv",
+                                     "SMAXPv","SMINPv","UMAXPv","UMINPv")>;
+
+def : InstRW<[CyWriteVABD], (instregex "SABDv","UABDv",
+                                       "SABDLv","UABDLv")>;
+
+//---
+// 7.9.3 Floating Point Arithmetic and Comparisons
+//---
+
+// FABS,FNEG are WriteF
+
+def : InstRW<[CyWriteV4], (instrs FADDPv2i32p)>;
+def : InstRW<[CyWriteV5], (instrs FADDPv2i64p)>;
+
+def : InstRW<[CyWriteV3], (instregex "FMAXPv2i","FMAXNMPv2i",
+                                     "FMINPv2i","FMINNMPv2i")>;
+
+def : InstRW<[CyWriteV4], (instregex "FMAXVv","FMAXNMVv","FMINVv","FMINNMVv")>;
+
+def : InstRW<[CyWriteV4], (instrs FADDSrr,FADDv2f32,FADDv4f32,
+                                  FSUBSrr,FSUBv2f32,FSUBv4f32,
+                                  FADDPv2f32,FADDPv4f32,
+                                  FABD32,FABDv2f32,FABDv4f32)>;
+def : InstRW<[CyWriteV5], (instrs FADDDrr,FADDv2f64,
+                                  FSUBDrr,FSUBv2f64,
+                                  FADDPv2f64,
+                                  FABD64,FABDv2f64)>;
+
+def : InstRW<[CyWriteV3], (instregex "FCMEQ","FCMGT","FCMLE","FCMLT")>;
+
+def : InstRW<[CyWriteV3], (instregex "FACGE","FACGT",
+                                     "FMAXS","FMAXD","FMAXv",
+                                     "FMINS","FMIND","FMINv",
+                                     "FMAXNMS","FMAXNMD","FMAXNMv",
+                                     "FMINNMS","FMINNMD","FMINNMv",
+                                     "FMAXPv2f","FMAXPv4f",
+                                     "FMINPv2f","FMINPv4f",
+                                     "FMAXNMPv2f","FMAXNMPv4f",
+                                     "FMINNMPv2f","FMINNMPv4f")>;
+
+// FCMP,FCMPE,FCCMP,FCCMPE
+def : WriteRes<WriteFCmp, [CyUnitVC]> {let Latency = 4;}
+
+// FCSEL is a WriteF.
+
+//---
+// 7.9.4 Shifts and Bitfield Operations
+//---
+
+// SHL is a WriteV
+
+def CyWriteVSHR : SchedWriteRes<[CyUnitV]> {let Latency = 2;}
+def : InstRW<[CyWriteVSHR], (instregex "SSHRv","USHRv")>;
+
+def CyWriteVSRSHR : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
+def : InstRW<[CyWriteVSRSHR], (instregex "SRSHRv","URSHRv")>;
+
+// Shift and accumulate uses the vector multiply unit.
+def CyWriteVShiftAcc : SchedWriteRes<[CyUnitVM]> {let Latency = 3;}
+def CyReadVShiftAcc  : SchedReadAdvance<1,
+                        [CyWriteVShiftAcc, CyWriteVSHR, CyWriteVSRSHR]>;
+def : InstRW<[CyWriteVShiftAcc, CyReadVShiftAcc],
+             (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>;
+
+// SSHL,USHL are WriteV.
+
+def : InstRW<[CyWriteV3], (instregex "SRSHLv","URSHLv")>;
+
+// SQSHL,SQSHLU,UQSHL are WriteV.
+
+def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>;
+
+// WriteV includes:
+// SHLL,SSHLL,USHLL
+// SLI,SRI
+// BIF,BIT,BSL
+// EXT
+// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN
+// XTN2
+
+def : InstRW<[CyWriteV4],
+             (instregex "RSHRNv","SHRNv",
+                        "SQRSHRNv","SQRSHRUNv","SQSHRNv","SQSHRUNv",
+                        "UQRSHRNv","UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
+
+//---
+// 7.9.5 Multiplication
+//---
+
+def CyWriteVMul : SchedWriteRes<[CyUnitVM]> { let Latency = 4;}
+def : InstRW<[CyWriteVMul], (instregex "MULv","SMULLv","UMULLv",
+                             "SQDMULLv","SQDMULHv","SQRDMULHv")>;
+
+// FMUL,FMULX,FNMUL default to WriteFMul.
+def : WriteRes<WriteFMul, [CyUnitVM]> { let Latency = 4;}
+
+def CyWriteV64Mul : SchedWriteRes<[CyUnitVM]> { let Latency = 5;}
+def : InstRW<[CyWriteV64Mul], (instrs FMULDrr,FMULv2f64,FMULv2i64_indexed,
+                               FNMULDrr,FMULX64,FMULXv2f64,FMULXv2i64_indexed)>;
+
+def CyReadVMulAcc : SchedReadAdvance<1, [CyWriteVMul, CyWriteV64Mul]>;
+def : InstRW<[CyWriteVMul, CyReadVMulAcc],
+             (instregex "MLA","MLS","SMLAL","SMLSL","UMLAL","UMLSL",
+              "SQDMLAL","SQDMLSL")>;
+
+def CyWriteSMul : SchedWriteRes<[CyUnitVM]> { let Latency = 8;}
+def CyWriteDMul : SchedWriteRes<[CyUnitVM]> { let Latency = 10;}
+def CyReadSMul : SchedReadAdvance<4, [CyWriteSMul]>;
+def CyReadDMul : SchedReadAdvance<5, [CyWriteDMul]>;
+
+def : InstRW<[CyWriteSMul, CyReadSMul],
+             (instrs FMADDSrrr,FMSUBSrrr,FNMADDSrrr,FNMSUBSrrr,
+              FMLAv2f32,FMLAv4f32,
+              FMLAv1i32_indexed,FMLAv1i64_indexed,FMLAv2i32_indexed)>;
+def : InstRW<[CyWriteDMul, CyReadDMul],
+             (instrs FMADDDrrr,FMSUBDrrr,FNMADDDrrr,FNMSUBDrrr,
+              FMLAv2f64,FMLAv2i64_indexed,
+              FMLSv2f64,FMLSv2i64_indexed)>;
+
+def CyWritePMUL : SchedWriteRes<[CyUnitVD]> { let Latency = 3; }
+def : InstRW<[CyWritePMUL], (instregex "PMULv", "PMULLv")>;
+
+//---
+// 7.9.6 Divide and Square Root
+//---
+
+// FDIV,FSQRT
+// TODO: Add 64-bit variant with 19 cycle latency.
+// TODO: Specialize FSQRT for longer latency.
+def : WriteRes<WriteFDiv, [CyUnitVD, CyUnitFloatDiv]> {
+  let Latency = 17;
+  let ResourceCycles = [2, 17];
+}
+
+def : InstRW<[CyWriteV4], (instregex "FRECPEv","FRECPXv","URECPEv","URSQRTEv")>;
+
+def WriteFRSQRTE : SchedWriteRes<[CyUnitVM]> { let Latency = 4; }
+def : InstRW<[WriteFRSQRTE], (instregex "FRSQRTEv")>;
+
+def WriteFRECPS : SchedWriteRes<[CyUnitVM]> { let Latency = 8; }
+def WriteFRSQRTS : SchedWriteRes<[CyUnitVM]> { let Latency = 10; }
+def : InstRW<[WriteFRECPS],  (instregex "FRECPSv")>;
+def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>;
+
+//---
+// 7.9.7 Integer-FP Conversions
+//---
+
+// FCVT lengthen f16/s32
+def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
+
+// FCVT,FCVTN,FCVTXN
+// SCVTF,UCVTF V,V
+// FRINT(AIMNPXZ) V,V
+def : WriteRes<WriteFCvt, [CyUnitV]> {let Latency = 4;}
+
+// SCVT/UCVT S/D, Rd = VLD5+V4: 9 cycles.
+def CyWriteCvtToFPR : WriteSequence<[WriteVLD, CyWriteV4]>;
+def : InstRW<[CyWriteCopyToFPR], (instregex "FCVT[AMNPZ][SU][SU][WX][SD]r")>;
+
+// FCVT Rd, S/D = V6+LD4: 10 cycles
+def CyWriteCvtToGPR : WriteSequence<[CyWriteV6, WriteLD]>;
+def : InstRW<[CyWriteCvtToGPR], (instregex "[SU]CVTF[SU][WX][SD]r")>;
+
+// FCVTL is a WriteV
+
+//---
+// 7.9.8-7.9.10 Cryptography, Data Transposition, Table Lookup
+//---
+
+def CyWriteCrypto2 : SchedWriteRes<[CyUnitVD]> {let Latency = 2;}
+def : InstRW<[CyWriteCrypto2], (instrs AESIMCrr, AESMCrr, SHA1Hrr,
+                                       AESDrr, AESErr, SHA1SU1rr, SHA256SU0rr,
+                                       SHA1SU0rrr)>;
+
+def CyWriteCrypto3 : SchedWriteRes<[CyUnitVD]> {let Latency = 3;}
+def : InstRW<[CyWriteCrypto3], (instrs SHA256SU1rrr)>;
+
+def CyWriteCrypto6 : SchedWriteRes<[CyUnitVD]> {let Latency = 6;}
+def : InstRW<[CyWriteCrypto6], (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr,
+                                       SHA256Hrrr,SHA256H2rrr)>;
+
+// TRN,UZP,ZUP are WriteV.
+
+// TBL,TBX are WriteV.
+
+//---
+// 7.9.11-7.9.14 Load/Store, single element and paired
+//---
+
+// Loading into the vector unit takes 5 cycles vs 4 for integer loads.
+def : WriteRes<WriteVLD, [CyUnitLS]> {
+  let Latency = 5;
+}
+
+// Store-load forwarding is 4 cycles.
+def : WriteRes<WriteVST, [CyUnitLS]> {
+  let Latency = 4;
+}
+
+// WriteVLDPair/VSTPair sequences are expanded by the target description.
+
+//---
+// 7.9.15 Load, element operations
+//---
+
+// Only the first WriteVLD and WriteAdr for writeback matches def operands.
+// Subsequent WriteVLDs consume resources. Since all loaded values have the
+// same latency, this is acceptable.
+
+// Vd is read 5 cycles after issuing the vector load.
+def : ReadAdvance<ReadVLD, 5>;
+
+def : InstRW<[WriteVLD],
+             (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD, WriteAdr],
+             (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
+
+// Register writes from the load's high half are fused micro-ops.
+def : InstRW<[WriteVLD],
+             (instregex "LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD, WriteAdr],
+             (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVLD, WriteVLD],
+             (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
+             (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLD, WriteVLD],
+             (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
+             (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVLD, WriteVLD, WriteVLD],
+             (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD],
+             (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLD, WriteVLD],
+             (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
+             (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVLD, WriteVLD, WriteVLD, WriteVLD],
+             (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD, WriteVLD],
+             (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, ReadVLD],
+             (instregex "LD1i(8|16|32)$")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],
+             (instregex "LD1i(8|16|32)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, ReadVLD],          (instrs LD1i64)>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],(instrs LD1i64_POST)>;
+
+def : InstRW<[WriteVLDShuffle],
+             (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr],
+             (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[WriteVLDShuffle, WriteV],
+             (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+             (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle],
+             (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle],
+             (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+             (instregex "LD2i(8|16|32)$")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+             (instregex "LD2i(8|16|32)_POST")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+             (instregex "LD2i64$")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+             (instregex "LD2i64_POST")>;
+
+def : InstRW<[WriteVLDShuffle, WriteV],
+             (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+             (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+             (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+             (instregex "LD3Threev(8b|4h|2s)_POST")>;
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle],
+             (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle],
+             (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV],
+             (instregex "LD3i(8|16|32)$")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV],
+             (instregex "LD3i(8|16|32)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV],
+             (instregex "LD3i64$")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+             (instregex "LD3i64_POST")>;
+
+def : InstRW<[WriteVLDShuffle, WriteV, WriteV],
+             (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV],
+             (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+             (instrs LD3Rv1d,LD3Rv2d)>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+             (instrs LD3Rv2d_POST,LD3Rv2d_POST)>;
+
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+             (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+             (instregex "LD4Fourv(8b|4h|2s)_POST")>;
+def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle,
+              WriteVLDPairShuffle, WriteVLDPairShuffle],
+             (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle,
+              WriteVLDPairShuffle, WriteVLDPairShuffle],
+             (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV],
+             (instregex "LD4i(8|16|32)$")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV],
+             (instregex "LD4i(8|16|32)_POST")>;
+
+
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV],
+             (instrs LD4i64)>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+             (instrs LD4i64_POST)>;
+
+def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV],
+             (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV],
+             (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+             (instrs LD4Rv1d,LD4Rv2d)>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+             (instrs LD4Rv1d_POST,LD4Rv2d_POST)>;
+
+//---
+// 7.9.16 Store, element operations
+//---
+
+// Only the WriteAdr for writeback matches a def operands.
+// Subsequent WriteVLDs only consume resources.
+
+def : InstRW<[WriteVST],
+             (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVST],
+             (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTShuffle],
+             (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle],
+             (instregex "ST1Twov(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVST, WriteVST],
+             (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVST, WriteVST],
+             (instregex "ST1Twov(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTShuffle, WriteVST],
+             (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVST],
+             (instregex "ST1Threev(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST],
+             (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST],
+             (instregex "ST1Threev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
+             (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
+             (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST],
+             (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST, WriteVST],
+             (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTShuffle],           (instregex "ST1i(8|16|32)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST1i(8|16|32)_POST")>;
+
+def : InstRW<[WriteVSTShuffle],           (instrs ST1i64)>;
+def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST1i64_POST)>;
+
+def : InstRW<[WriteVSTShuffle],
+             (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle],
+             (instregex "ST2Twov(8b|4h|2s)_POST")>;
+def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
+             (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
+             (instregex "ST2Twov(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTShuffle],           (instregex "ST2i(8|16|32)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST2i(8|16|32)_POST")>;
+def : InstRW<[WriteVSTShuffle],           (instrs ST2i64)>;
+def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST2i64_POST)>;
+
+def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
+             (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
+             (instregex "ST3Threev(8b|4h|2s)_POST")>;
+def : InstRW<[WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle],
+             (instregex "ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle],
+             (instregex "ST3Threev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTShuffle],           (instregex "ST3i(8|16|32)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST3i(8|16|32)_POST")>;
+
+def :InstRW<[WriteVSTShuffle, WriteVSTShuffle],           (instrs ST3i64)>;
+def :InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64_POST)>;
+
+def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle],
+            (instregex "ST4Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle],
+            (instregex "ST4Fourv(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle,
+              WriteVSTPairShuffle, WriteVSTPairShuffle],
+             (instregex "ST4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle,
+              WriteVSTPairShuffle, WriteVSTPairShuffle],
+             (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTPairShuffle],           (instregex "ST4i(8|16|32)$")>;
+def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>;
+
+def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],          (instrs ST4i64)>;
+def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>;
+
+} // SchedModel = CycloneModel

Added: llvm/trunk/lib/Target/ARM64/ARM64Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64Schedule.td?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64Schedule.td (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64Schedule.td Sat Mar 29 05:18:08 2014
@@ -0,0 +1,92 @@
+//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Define TII for use in SchedVariant Predicates.
+// const MachineInstr *MI and const TargetSchedModel *SchedModel
+// are defined by default.
+def : PredicateProlog<[{
+  const ARM64InstrInfo *TII =
+    static_cast<const ARM64InstrInfo*>(SchedModel->getInstrInfo());
+  (void)TII;
+}]>;
+
+// ARM64 Scheduler Definitions
+
+def WriteImm       : SchedWrite; // MOVN, MOVZ
+// TODO: Provide variants for MOV32/64imm Pseudos that dynamically
+// select the correct sequence of WriteImms.
+
+def WriteI         : SchedWrite; // ALU
+def WriteISReg     : SchedWrite; // ALU of Shifted-Reg
+def WriteIEReg     : SchedWrite; // ALU of Extended-Reg
+def WriteExtr      : SchedWrite; // EXTR shifts a reg pair
+def ReadExtrHi     : SchedRead;  // Read the high reg of the EXTR pair
+def WriteIS        : SchedWrite; // Shift/Scale
+def WriteID32      : SchedWrite; // 32-bit Divide
+def WriteID64      : SchedWrite; // 64-bit Divide
+def WriteIM32      : SchedWrite; // 32-bit Multiply
+def WriteIM64      : SchedWrite; // 64-bit Multiply
+def WriteBr        : SchedWrite; // Branch
+def WriteBrReg     : SchedWrite; // Indirect Branch
+
+def WriteLD        : SchedWrite; // Load from base addr plus immediate offset
+def WriteST        : SchedWrite; // Store to base addr plus immediate offset
+def WriteSTP       : SchedWrite; // Store a register pair.
+def WriteAdr       : SchedWrite; // Address pre/post increment.
+
+def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
+def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
+def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
+
+// ScaledIdxPred is true if a WriteLDIdx operand will be
+// scaled. Subtargets can use this to dynamically select resources and
+// latency for WriteLDIdx and ReadAdrBase.
+def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(MI)}]>;
+
+// Serialized two-level address load.
+// EXAMPLE: LOADGot
+def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>;
+
+// Serialized two-level address lookup.
+// EXAMPLE: MOVaddr...
+def WriteAdrAdr : WriteSequence<[WriteAdr, WriteAdr]>;
+
+// The second register of a load-pair.
+// LDP,LDPSW,LDNP,LDXP,LDAXP
+def WriteLDHi : SchedWrite;
+
+// Store-exclusive is a store followed by a dependent load.
+def WriteSTX : WriteSequence<[WriteST, WriteLD]>;
+
+def WriteSys     : SchedWrite; // Long, variable latency system ops.
+def WriteBarrier : SchedWrite; // Memory barrier.
+def WriteHint    : SchedWrite; // Hint instruction.
+
+def WriteF       : SchedWrite; // General floating-point ops.
+def WriteFCmp    : SchedWrite; // Floating-point compare.
+def WriteFCvt    : SchedWrite; // Float conversion.
+def WriteFCopy   : SchedWrite; // Float-int register copy.
+def WriteFImm    : SchedWrite; // Floating-point immediate.
+def WriteFMul    : SchedWrite; // Floating-point multiply.
+def WriteFDiv    : SchedWrite; // Floating-point division.
+
+def WriteV   : SchedWrite; // Vector ops.
+def WriteVLD : SchedWrite; // Vector loads.
+def WriteVST : SchedWrite; // Vector stores.
+
+// Read the unwritten lanes of the VLD's destination registers.
+def ReadVLD : SchedRead;
+
+// Sequential vector load and shuffle.
+def WriteVLDShuffle     : WriteSequence<[WriteVLD, WriteV]>;
+def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
+
+// Store a shuffled vector.
+def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
+def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;

Added: llvm/trunk/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp Sat Mar 29 05:18:08 2014
@@ -0,0 +1,57 @@
+//===-- ARM64SelectionDAGInfo.cpp - ARM64 SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM64SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm64-selectiondag-info"
+#include "ARM64TargetMachine.h"
+using namespace llvm;
+
+ARM64SelectionDAGInfo::ARM64SelectionDAGInfo(const TargetMachine &TM)
+    : TargetSelectionDAGInfo(TM),
+      Subtarget(&TM.getSubtarget<ARM64Subtarget>()) {}
+
+ARM64SelectionDAGInfo::~ARM64SelectionDAGInfo() {}
+
+SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset(
+    SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
+    SDValue Size, unsigned Align, bool isVolatile,
+    MachinePointerInfo DstPtrInfo) const {
+  // Check to see if there is a specialized entry-point for memory zeroing.
+  ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+  ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
+  const char *bzeroEntry =
+      (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : 0;
+  // For small size (< 256), it is not beneficial to use bzero
+  // instead of memset.
+  if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
+    const ARM64TargetLowering &TLI = *static_cast<const ARM64TargetLowering *>(
+                                          DAG.getTarget().getTargetLowering());
+
+    EVT IntPtr = TLI.getPointerTy();
+    Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+    TargetLowering::ArgListTy Args;
+    TargetLowering::ArgListEntry Entry;
+    Entry.Node = Dst;
+    Entry.Ty = IntPtrTy;
+    Args.push_back(Entry);
+    Entry.Node = Size;
+    Args.push_back(Entry);
+    TargetLowering::CallLoweringInfo CLI(
+        Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false,
+        0, CallingConv::C, /*isTailCall=*/false,
+        /*doesNotRet=*/false, /*isReturnValueUsed=*/false,
+        DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
+    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+    return CallResult.second;
+  }
+  return SDValue();
+}

Added: llvm/trunk/lib/Target/ARM64/ARM64SelectionDAGInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64SelectionDAGInfo.h?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64SelectionDAGInfo.h (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64SelectionDAGInfo.h Sat Mar 29 05:18:08 2014
@@ -0,0 +1,38 @@
+//===-- ARM64SelectionDAGInfo.h - ARM64 SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM64 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM64SELECTIONDAGINFO_H
+#define ARM64SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class ARM64SelectionDAGInfo : public TargetSelectionDAGInfo {
+  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const ARM64Subtarget *Subtarget;
+
+public:
+  explicit ARM64SelectionDAGInfo(const TargetMachine &TM);
+  ~ARM64SelectionDAGInfo();
+
+  virtual SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
+                                          SDValue Chain, SDValue Dst,
+                                          SDValue Src, SDValue Size,
+                                          unsigned Align, bool isVolatile,
+                                          MachinePointerInfo DstPtrInfo) const;
+};
+}
+
+#endif

Added: llvm/trunk/lib/Target/ARM64/ARM64StorePairSuppress.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64StorePairSuppress.cpp?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64StorePairSuppress.cpp (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64StorePairSuppress.cpp Sat Mar 29 05:18:08 2014
@@ -0,0 +1,169 @@
+//===---- ARM64StorePairSuppress.cpp --- Suppress store pair formation ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies floating point stores that should not be combined into
+// store pairs. Later we may do the same for floating point loads.
+// ===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm64-stp-suppress"
+#include "ARM64InstrInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class ARM64StorePairSuppress : public MachineFunctionPass {
+  const ARM64InstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const MachineRegisterInfo *MRI;
+  MachineFunction *MF;
+  TargetSchedModel SchedModel;
+  MachineTraceMetrics *Traces;
+  MachineTraceMetrics::Ensemble *MinInstr;
+
+public:
+  static char ID;
+  ARM64StorePairSuppress() : MachineFunctionPass(ID) {}
+
+  virtual const char *getPassName() const {
+    return "ARM64 Store Pair Suppression";
+  }
+
+  bool runOnMachineFunction(MachineFunction &F);
+
+private:
+  bool shouldAddSTPToBlock(const MachineBasicBlock *BB);
+
+  bool isNarrowFPStore(const MachineInstr *MI);
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesCFG();
+    AU.addRequired<MachineTraceMetrics>();
+    AU.addPreserved<MachineTraceMetrics>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+char ARM64StorePairSuppress::ID = 0;
+} // anonymous
+
+FunctionPass *llvm::createARM64StorePairSuppressPass() {
+  return new ARM64StorePairSuppress();
+}
+
+/// Return true if an STP can be added to this block without increasing the
+/// critical resource height. STP is good to form in Ld/St limited blocks and
+/// bad to form in float-point limited blocks. This is true independent of the
+/// critical path. If the critical path is longer than the resource height, the
+/// extra vector ops can limit physreg renaming. Otherwise, it could simply
+/// oversaturate the vector units.
+bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
+  if (!MinInstr)
+    MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+
+  MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
+  unsigned ResLength = BBTrace.getResourceLength();
+
+  // Get the machine model's scheduling class for STPQi.
+  // Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
+  unsigned SCIdx = TII->get(ARM64::STPDi).getSchedClass();
+  const MCSchedClassDesc *SCDesc =
+      SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
+
+  // If a subtarget does not define resources for STPQi, bail here.
+  if (SCDesc->isValid() && !SCDesc->isVariant()) {
+    unsigned ResLenWithSTP = BBTrace.getResourceLength(
+        ArrayRef<const MachineBasicBlock *>(), SCDesc);
+    if (ResLenWithSTP > ResLength) {
+      DEBUG(dbgs() << "  Suppress STP in BB: " << BB->getNumber()
+                   << " resources " << ResLength << " -> " << ResLenWithSTP
+                   << "\n");
+      return false;
+    }
+  }
+  return true;
+}
+
+/// Return true if this is a floating-point store smaller than the V reg. On
+/// cyclone, these require a vector shuffle before storing a pair.
+/// Ideally we would call getMatchingPairOpcode() and have the machine model
+/// tell us if it's profitable with no cpu knowledge here.
+///
+/// FIXME: We plan to develop a decent Target abstraction for simple loads and
+/// stores. Until then use a nasty switch similar to ARM64LoadStoreOptimizer.
+bool ARM64StorePairSuppress::isNarrowFPStore(const MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default:
+    return false;
+  case ARM64::STRSui:
+  case ARM64::STRDui:
+  case ARM64::STURSi:
+  case ARM64::STURDi:
+    return true;
+  }
+}
+
+bool ARM64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  TII = static_cast<const ARM64InstrInfo *>(MF->getTarget().getInstrInfo());
+  TRI = MF->getTarget().getRegisterInfo();
+  MRI = &MF->getRegInfo();
+  const TargetSubtargetInfo &ST =
+      MF->getTarget().getSubtarget<TargetSubtargetInfo>();
+  SchedModel.init(*ST.getSchedModel(), &ST, TII);
+
+  Traces = &getAnalysis<MachineTraceMetrics>();
+  MinInstr = 0;
+
+  DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n');
+
+  if (!SchedModel.hasInstrSchedModel()) {
+    DEBUG(dbgs() << "  Skipping pass: no machine model present.\n");
+    return false;
+  }
+
+  // Check for a sequence of stores to the same base address. We don't need to
+  // precisely determine whether a store pair can be formed. But we do want to
+  // filter out most situations where we can't form store pairs to avoid
+  // computing trace metrics in those cases.
+  for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); BI != BE;
+       ++BI) {
+    bool SuppressSTP = false;
+    unsigned PrevBaseReg = 0;
+    for (MachineBasicBlock::iterator I = BI->begin(), E = BI->end(); I != E;
+         ++I) {
+      if (!isNarrowFPStore(I))
+        continue;
+      unsigned BaseReg;
+      unsigned Offset;
+      if (TII->getLdStBaseRegImmOfs(I, BaseReg, Offset, TRI)) {
+        if (PrevBaseReg == BaseReg) {
+          // If this block can take STPs, skip ahead to the next block.
+          if (!SuppressSTP && shouldAddSTPToBlock(I->getParent()))
+            break;
+          // Otherwise, continue unpairing the stores in this block.
+          DEBUG(dbgs() << "Unpairing store " << *I << "\n");
+          SuppressSTP = true;
+          TII->suppressLdStPair(I);
+        }
+        PrevBaseReg = BaseReg;
+      } else
+        PrevBaseReg = 0;
+    }
+  }
+  // This pass just sets some internal MachineMemOperand flags. It can't really
+  // invalidate anything.
+  return false;
+}

Added: llvm/trunk/lib/Target/ARM64/ARM64Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64Subtarget.cpp?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64Subtarget.cpp (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64Subtarget.cpp Sat Mar 29 05:18:08 2014
@@ -0,0 +1,83 @@
+//===-- ARM64Subtarget.cpp - ARM64 Subtarget Information --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM64 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM64InstrInfo.h"
+#include "ARM64Subtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "ARM64GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+ARM64Subtarget::ARM64Subtarget(const std::string &TT, const std::string &CPU,
+                               const std::string &FS)
+    : ARM64GenSubtargetInfo(TT, CPU, FS), HasZeroCycleRegMove(false),
+      HasZeroCycleZeroing(false), CPUString(CPU), TargetTriple(TT) {
+  // Determine default and user-specified characteristics
+
+  if (CPUString.empty())
+    // We default to Cyclone for now.
+    CPUString = "cyclone";
+
+  ParseSubtargetFeatures(CPUString, FS);
+}
+
+/// ClassifyGlobalReference - Find the target operand flags that describe
+/// how a global value should be referenced for the current subtarget.
+unsigned char
+ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
+                                        const TargetMachine &TM) const {
+
+  // Determine whether this is a reference to a definition or a declaration.
+  // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+  // load from stub.
+  bool isDecl = GV->hasAvailableExternallyLinkage();
+  if (GV->isDeclaration() && !GV->isMaterializable())
+    isDecl = true;
+
+  // If symbol visibility is hidden, the extra load is not needed if
+  // the symbol is definitely defined in the current translation unit.
+  if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility() &&
+      (isDecl || GV->isWeakForLinker()))
+    return ARM64II::MO_GOT;
+
+  if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
+    return ARM64II::MO_GOT;
+
+  // FIXME: this will fail on static ELF for weak symbols.
+  return ARM64II::MO_NO_FLAG;
+}
+
+/// This function returns the name of a function which has an interface
+/// like the non-standard bzero function, if such a function exists on
+/// the current subtarget and it is considered prefereable over
+/// memset with zero passed as the second argument. Otherwise it
+/// returns null.
+const char *ARM64Subtarget::getBZeroEntry() const {
+  // At the moment, always prefer bzero.
+  return "bzero";
+}
+
+void ARM64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+                                         MachineInstr *begin, MachineInstr *end,
+                                         unsigned NumRegionInstrs) const {
+  // LNT run (at least on Cyclone) showed reasonably significant gains for
+  // bi-directional scheduling. 253.perlbmk.
+  Policy.OnlyTopDown = false;
+  Policy.OnlyBottomUp = false;
+}

Added: llvm/trunk/lib/Target/ARM64/ARM64Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64Subtarget.h?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64Subtarget.h (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64Subtarget.h Sat Mar 29 05:18:08 2014
@@ -0,0 +1,87 @@
+//=====---- ARM64Subtarget.h - Define Subtarget for the ARM64 -*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM64 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM64SUBTARGET_H
+#define ARM64SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "ARM64RegisterInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "ARM64GenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+
+class ARM64Subtarget : public ARM64GenSubtargetInfo {
+protected:
+  // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
+  bool HasZeroCycleRegMove;
+
+  // HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
+  bool HasZeroCycleZeroing;
+
+  /// CPUString - String name of used CPU.
+  std::string CPUString;
+
+  /// TargetTriple - What processor and OS we're targeting.
+  Triple TargetTriple;
+
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ARM64Subtarget(const std::string &TT, const std::string &CPU,
+                 const std::string &FS);
+
+  virtual bool enableMachineScheduler() const { return true; }
+
+  bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
+
+  bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
+
+  bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+
+  bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+
+  bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
+
+  bool isCyclone() const { return CPUString == "cyclone"; }
+
+  /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+  /// that still makes it profitable to inline the call.
+  unsigned getMaxInlineSizeThreshold() const { return 64; }
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  /// ClassifyGlobalReference - Find the target operand flags that describe
+  /// how a global value should be referenced for the current subtarget.
+  unsigned char ClassifyGlobalReference(const GlobalValue *GV,
+                                        const TargetMachine &TM) const;
+
+  /// This function returns the name of a function which has an interface
+  /// like the non-standard bzero function, if such a function exists on
+  /// the current subtarget and it is considered prefereable over
+  /// memset with zero passed as the second argument. Otherwise it
+  /// returns null.
+  const char *getBZeroEntry() const;
+
+  void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin,
+                           MachineInstr *end, unsigned NumRegionInstrs) const;
+};
+} // End llvm namespace
+
+#endif // ARM64SUBTARGET_H

Added: llvm/trunk/lib/Target/ARM64/ARM64TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64TargetMachine.cpp?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64TargetMachine.cpp (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64TargetMachine.cpp Sat Mar 29 05:18:08 2014
@@ -0,0 +1,157 @@
+//===-- ARM64TargetMachine.cpp - Define TargetMachine for ARM64 -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM64.h"
+#include "ARM64TargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+static cl::opt<bool> EnableCCMP("arm64-ccmp",
+                                cl::desc("Enable the CCMP formation pass"),
+                                cl::init(true));
+
+static cl::opt<bool> EnableStPairSuppress("arm64-stp-suppress", cl::Hidden,
+                                          cl::desc("Suppress STP for ARM64"),
+                                          cl::init(true));
+
+static cl::opt<bool>
+EnablePromoteConstant("arm64-promote-const", cl::Hidden,
+                      cl::desc("Enable the promote constant pass"),
+                      cl::init(true));
+
+static cl::opt<bool>
+EnableCollectLOH("arm64-collect-loh", cl::Hidden,
+                 cl::desc("Enable the pass that emits the linker"
+                          " optimization hints (LOH)"),
+                 cl::init(true));
+
+extern "C" void LLVMInitializeARM64Target() {
+  // Register the target.
+  RegisterTargetMachine<ARM64TargetMachine> X(TheARM64Target);
+}
+
+/// TargetMachine ctor - Create an ARM64 architecture model.
+///
+ARM64TargetMachine::ARM64TargetMachine(const Target &T, StringRef TT,
+                                       StringRef CPU, StringRef FS,
+                                       const TargetOptions &Options,
+                                       Reloc::Model RM, CodeModel::Model CM,
+                                       CodeGenOpt::Level OL)
+    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+      Subtarget(TT, CPU, FS),
+      DL(Subtarget.isTargetMachO() ? "e-m:o-i64:64-i128:128-n32:64-S128"
+                                   : "e-m:e-i64:64-i128:128-n32:64-S128"),
+      InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget),
+      TSInfo(*this) {
+  initAsmInfo();
+}
+
+namespace {
+/// ARM64 Code Generator Pass Configuration Options.
+class ARM64PassConfig : public TargetPassConfig {
+public:
+  ARM64PassConfig(ARM64TargetMachine *TM, PassManagerBase &PM)
+      : TargetPassConfig(TM, PM) {}
+
+  ARM64TargetMachine &getARM64TargetMachine() const {
+    return getTM<ARM64TargetMachine>();
+  }
+
+  virtual bool addPreISel();
+  virtual bool addInstSelector();
+  virtual bool addILPOpts();
+  virtual bool addPreRegAlloc();
+  virtual bool addPostRegAlloc();
+  virtual bool addPreSched2();
+  virtual bool addPreEmitPass();
+};
+} // namespace
+
+void ARM64TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+  // Add first the target-independent BasicTTI pass, then our ARM64 pass. This
+  // allows the ARM64 pass to delegate to the target independent layer when
+  // appropriate.
+  PM.add(createBasicTargetTransformInfoPass(this));
+  PM.add(createARM64TargetTransformInfoPass(this));
+}
+
+TargetPassConfig *ARM64TargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new ARM64PassConfig(this, PM);
+}
+
+// Pass Pipeline Configuration
+bool ARM64PassConfig::addPreISel() {
+  // Run promote constant before global merge, so that the promoted constants
+  // get a chance to be merged
+  if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
+    addPass(createARM64PromoteConstantPass());
+  if (TM->getOptLevel() != CodeGenOpt::None)
+    addPass(createGlobalMergePass(TM));
+  if (TM->getOptLevel() != CodeGenOpt::None)
+    addPass(createARM64AddressTypePromotionPass());
+  return false;
+}
+
+bool ARM64PassConfig::addInstSelector() {
+  addPass(createARM64ISelDag(getARM64TargetMachine(), getOptLevel()));
+
+  // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
+  // references to _TLS_MODULE_BASE_ as possible.
+  if (TM->getSubtarget<ARM64Subtarget>().isTargetELF() &&
+      getOptLevel() != CodeGenOpt::None)
+    addPass(createARM64CleanupLocalDynamicTLSPass());
+
+  return false;
+}
+
+bool ARM64PassConfig::addILPOpts() {
+  if (EnableCCMP)
+    addPass(createARM64ConditionalCompares());
+  addPass(&EarlyIfConverterID);
+  if (EnableStPairSuppress)
+    addPass(createARM64StorePairSuppressPass());
+  return true;
+}
+
+bool ARM64PassConfig::addPreRegAlloc() {
+  // Use AdvSIMD scalar instructions whenever profitable.
+  addPass(createARM64AdvSIMDScalar());
+  return true;
+}
+
+bool ARM64PassConfig::addPostRegAlloc() {
+  // Change dead register definitions to refer to the zero register.
+  addPass(createARM64DeadRegisterDefinitions());
+  return true;
+}
+
+bool ARM64PassConfig::addPreSched2() {
+  // Expand some pseudo instructions to allow proper scheduling.
+  addPass(createARM64ExpandPseudoPass());
+  // Use load/store pair instructions when possible.
+  addPass(createARM64LoadStoreOptimizationPass());
+  return true;
+}
+
+bool ARM64PassConfig::addPreEmitPass() {
+  // Relax conditional branch instructions if they're otherwise out of
+  // range of their destination.
+  addPass(createARM64BranchRelaxation());
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH)
+    addPass(createARM64CollectLOHPass());
+  return true;
+}

Added: llvm/trunk/lib/Target/ARM64/ARM64TargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64TargetMachine.h?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64TargetMachine.h (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64TargetMachine.h Sat Mar 29 05:18:08 2014
@@ -0,0 +1,69 @@
+//===-- ARM64TargetMachine.h - Define TargetMachine for ARM64 ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM64TARGETMACHINE_H
+#define ARM64TARGETMACHINE_H
+
+#include "ARM64InstrInfo.h"
+#include "ARM64ISelLowering.h"
+#include "ARM64Subtarget.h"
+#include "ARM64FrameLowering.h"
+#include "ARM64SelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+
+class ARM64TargetMachine : public LLVMTargetMachine {
+protected:
+  ARM64Subtarget Subtarget;
+
+private:
+  const DataLayout DL;
+  ARM64InstrInfo InstrInfo;
+  ARM64TargetLowering TLInfo;
+  ARM64FrameLowering FrameLowering;
+  ARM64SelectionDAGInfo TSInfo;
+
+public:
+  ARM64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+                     const TargetOptions &Options, Reloc::Model RM,
+                     CodeModel::Model CM, CodeGenOpt::Level OL);
+
+  virtual const ARM64Subtarget *getSubtargetImpl() const { return &Subtarget; }
+  virtual const ARM64TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+  virtual const DataLayout *getDataLayout() const { return &DL; }
+  virtual const ARM64FrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual const ARM64InstrInfo *getInstrInfo() const { return &InstrInfo; }
+  virtual const ARM64RegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  virtual const ARM64SelectionDAGInfo *getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  // Pass Pipeline Configuration
+  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+  /// \brief Register ARM64 analysis passes with a pass manager.
+  virtual void addAnalysisPasses(PassManagerBase &PM);
+};
+
+} // end namespace llvm
+
+#endif

Added: llvm/trunk/lib/Target/ARM64/ARM64TargetObjectFile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64TargetObjectFile.cpp?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64TargetObjectFile.cpp (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64TargetObjectFile.cpp Sat Mar 29 05:18:08 2014
@@ -0,0 +1,52 @@
+//===-- ARM64TargetObjectFile.cpp - ARM64 Object Info ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM64TargetObjectFile.h"
+#include "ARM64TargetMachine.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+using namespace dwarf;
+
+void ARM64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
+                                           const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+}
+
+const MCExpr *ARM64_MachoTargetObjectFile::getTTypeGlobalReference(
+    const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
+    const TargetMachine &TM, MachineModuleInfo *MMI,
+    MCStreamer &Streamer) const {
+  // On Darwin, we can reference dwarf symbols with foo at GOT-., which
+  // is an indirect pc-relative reference. The default implementation
+  // won't reference using the GOT, so we need this target-specific
+  // version.
+  if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
+    const MCSymbol *Sym = TM.getSymbol(GV, Mang);
+    const MCExpr *Res =
+        MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
+    MCSymbol *PCSym = getContext().CreateTempSymbol();
+    Streamer.EmitLabel(PCSym);
+    const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
+    return MCBinaryExpr::CreateSub(Res, PC, getContext());
+  }
+
+  return TargetLoweringObjectFileMachO::getTTypeGlobalReference(
+      GV, Encoding, Mang, TM, MMI, Streamer);
+}
+
+MCSymbol *ARM64_MachoTargetObjectFile::getCFIPersonalitySymbol(
+    const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+    MachineModuleInfo *MMI) const {
+  return TM.getSymbol(GV, Mang);
+}

Added: llvm/trunk/lib/Target/ARM64/ARM64TargetObjectFile.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64TargetObjectFile.h?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64TargetObjectFile.h (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64TargetObjectFile.h Sat Mar 29 05:18:08 2014
@@ -0,0 +1,40 @@
+//===-- ARM64TargetObjectFile.h - ARM64 Object Info -*- C++ -------------*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM64_TARGETOBJECTFILE_H
+#define LLVM_TARGET_ARM64_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+class ARM64TargetMachine;
+
+/// This implementation is used for AArch64 ELF targets (Linux in particular).
+class ARM64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+  virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+};
+
+/// ARM64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
+class ARM64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+public:
+  const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
+                                        unsigned Encoding, Mangler &Mang,
+                                        const TargetMachine &TM,
+                                        MachineModuleInfo *MMI,
+                                        MCStreamer &Streamer) const override;
+
+  MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
+                                    const TargetMachine &TM,
+                                    MachineModuleInfo *MMI) const override;
+};
+
+} // end namespace llvm
+
+#endif

Added: llvm/trunk/lib/Target/ARM64/ARM64TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64TargetTransformInfo.cpp?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64TargetTransformInfo.cpp (added)
+++ llvm/trunk/lib/Target/ARM64/ARM64TargetTransformInfo.cpp Sat Mar 29 05:18:08 2014
@@ -0,0 +1,326 @@
+//===-- ARM64TargetTransformInfo.cpp - ARM64 specific TTI pass ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// ARM64 target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm64tti"
+#include "ARM64.h"
+#include "ARM64TargetMachine.h"
+#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeARM64TTIPass(PassRegistry &);
+}
+
+namespace {
+
+class ARM64TTI final : public ImmutablePass, public TargetTransformInfo {
+  const ARM64TargetMachine *TM;
+  const ARM64Subtarget *ST;
+  const ARM64TargetLowering *TLI;
+
+  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+  /// are set if the result needs to be inserted and/or extracted from vectors.
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+  ARM64TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+    llvm_unreachable("This pass cannot be directly constructed");
+  }
+
+  ARM64TTI(const ARM64TargetMachine *TM)
+      : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+        TLI(TM->getTargetLowering()) {
+    initializeARM64TTIPass(*PassRegistry::getPassRegistry());
+  }
+
+  void initializePass() override { pushTTIStack(this); }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    TargetTransformInfo::getAnalysisUsage(AU);
+  }
+
+  /// Pass identification.
+  static char ID;
+
+  /// Provide necessary pointer adjustments for the two base classes.
+  void *getAdjustedAnalysisPointer(const void *ID) override {
+    if (ID == &TargetTransformInfo::ID)
+      return (TargetTransformInfo *)this;
+    return this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+
+  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
+  PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
+
+  /// @}
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  unsigned getNumberOfRegisters(bool Vector) const override {
+    if (Vector)
+      return 32;
+
+    return 31;
+  }
+
+  unsigned getRegisterBitWidth(bool Vector) const override {
+    if (Vector)
+      return 128;
+
+    return 64;
+  }
+
+  unsigned getMaximumUnrollFactor() const override { return 2; }
+
+  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
+      override;
+
+  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
+      override;
+
+  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                  OperandValueKind Opd1Info = OK_AnyValue,
+                                  OperandValueKind Opd2Info = OK_AnyValue) const
+      override;
+
+  unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
+
+  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const
+      override;
+
+  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                           unsigned AddressSpace) const override;
+  /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(ARM64TTI, TargetTransformInfo, "arm64tti",
+                   "ARM64 Target Transform Info", true, true, false)
+char ARM64TTI::ID = 0;
+
+ImmutablePass *
+llvm::createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM) {
+  return new ARM64TTI(TM);
+}
+
+unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  if (BitSize == 0)
+    return ~0U;
+
+  int64_t Val = Imm.getSExtValue();
+  if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize))
+    return 1;
+
+  if ((int64_t)Val < 0)
+    Val = ~Val;
+  if (BitSize == 32)
+    Val &= (1LL << 32) - 1;
+
+  unsigned LZ = countLeadingZeros((uint64_t)Val);
+  unsigned Shift = (63 - LZ) / 16;
+  // MOVZ is free so return true for one or fewer MOVK.
+  return (Shift == 0) ? 1 : Shift;
+}
+
+ARM64TTI::PopcntSupportKind ARM64TTI::getPopcntSupport(unsigned TyWidth) const {
+  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+  if (TyWidth == 32 || TyWidth == 64)
+    return PSK_FastHardware;
+  // TODO: ARM64TargetLowering::LowerCTPOP() supports 128bit popcount.
+  return PSK_Software;
+}
+
+unsigned ARM64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
+                                    Type *Src) const {
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  EVT SrcTy = TLI->getValueType(Src);
+  EVT DstTy = TLI->getValueType(Dst);
+
+  if (!SrcTy.isSimple() || !DstTy.isSimple())
+    return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+
+  static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
+    // LowerVectorINT_TO_FP:
+    { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
+    { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
+    // LowerVectorFP_TO_INT
+    { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
+    { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
+    { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+    { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
+    { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
+    { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
+    { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 },
+    { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 },
+  };
+
+  int Idx = ConvertCostTableLookup<MVT>(
+      ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(),
+      SrcTy.getSimpleVT());
+  if (Idx != -1)
+    return ConversionTbl[Idx].Cost;
+
+  return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned ARM64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                      unsigned Index) const {
+  assert(Val->isVectorTy() && "This must be a vector type");
+
+  if (Index != -1U) {
+    // Legalize the type.
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
+
+    // This type is legalized to a scalar type.
+    if (!LT.second.isVector())
+      return 0;
+
+    // The type may be split. Normalize the index to the new type.
+    unsigned Width = LT.second.getVectorNumElements();
+    Index = Index % Width;
+
+    // The element at index zero is already inside the vector.
+    if (Index == 0)
+      return 0;
+  }
+
+  // All other insert/extracts cost this much.
+  return 2;
+}
+
+unsigned ARM64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                          OperandValueKind Opd1Info,
+                                          OperandValueKind Opd2Info) const {
+  // Legalize the type.
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+
+  switch (ISD) {
+  default:
+    return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info,
+                                                       Opd2Info);
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::XOR:
+  case ISD::OR:
+  case ISD::AND:
+    // These nodes are marked as 'custom' for combining purposes only.
+    // We know that they are legal. See LowerAdd in ISelLowering.
+    return 1 * LT.first;
+  }
+}
+
+unsigned ARM64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
+  // Address computations in vectorized code with non-consecutive addresses will
+  // likely result in more instructions compared to scalar code where the
+  // computation can more often be merged into the index mode. The resulting
+  // extra micro-ops can significantly decrease throughput.
+  unsigned NumVectorInstToHideOverhead = 10;
+
+  if (Ty->isVectorTy() && IsComplex)
+    return NumVectorInstToHideOverhead;
+
+  // In many cases the address computation is not merged into the instruction
+  // addressing mode.
+  return 1;
+}
+
+unsigned ARM64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                      Type *CondTy) const {
+
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  // We don't lower vector selects well that are wider than the register width.
+  if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
+    // We would need this many instructions to hide the scalarization happening.
+    unsigned AmortizationCost = 20;
+    static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+    VectorSelectTbl[] = {
+      { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
+      { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost },
+      { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost },
+      { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
+      { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
+      { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
+    };
+
+    EVT SelCondTy = TLI->getValueType(CondTy);
+    EVT SelValTy = TLI->getValueType(ValTy);
+    if (SelCondTy.isSimple() && SelValTy.isSimple()) {
+      int Idx =
+          ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
+                                 SelValTy.getSimpleVT());
+      if (Idx != -1)
+        return VectorSelectTbl[Idx].Cost;
+    }
+  }
+  return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned ARM64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                   unsigned Alignment,
+                                   unsigned AddressSpace) const {
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+
+  if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
+      Src->getVectorElementType()->isIntegerTy(64)) {
+    // Unaligned stores are extremely inefficient. We don't split
+    // unaligned v2i64 stores because the negative impact that has shown in
+    // practice on inlined memcpy code.
+    // We make v2i64 stores expensive so that we will only vectorize if there
+    // are 6 other instructions getting vectorized.
+    unsigned AmortizationCost = 6;
+
+    return LT.first * 2 * AmortizationCost;
+  }
+
+  if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
+      Src->getVectorNumElements() < 8) {
+    // We scalarize the loads/stores because there is not v.4b register and we
+    // have to promote the elements to v.4h.
+    unsigned NumVecElts = Src->getVectorNumElements();
+    unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
+    // We generate 2 instructions per vector element.
+    return NumVectorizableInstsToAmortize * NumVecElts * 2;
+  }
+
+  return LT.first;
+}

Added: llvm/trunk/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp (added)
+++ llvm/trunk/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp Sat Mar 29 05:18:08 2014
@@ -0,0 +1,4832 @@
+//===-- ARM64AsmParser.cpp - Parse ARM64 assembly to MCInst instructions --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "MCTargetDesc/ARM64BaseInfo.h"
+#include "MCTargetDesc/ARM64MCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include <cstdio>
+using namespace llvm;
+
+namespace {
+
+class ARM64Operand;
+
+class ARM64AsmParser : public MCTargetAsmParser {
+public:
+  typedef SmallVectorImpl<MCParsedAsmOperand *> OperandVector;
+
+private:
+  StringRef Mnemonic; //< Instruction mnemonic.
+  MCSubtargetInfo &STI;
+  MCAsmParser &Parser;
+
+  MCAsmParser &getParser() const { return Parser; }
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  SMLoc getLoc() const { return Parser.getTok().getLoc(); }
+
+  bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
+  unsigned parseCondCodeString(StringRef Cond);
+  bool parseCondCode(OperandVector &Operands, bool invertCondCode);
+  int tryParseRegister();
+  int tryMatchVectorRegister(StringRef &Kind);
+  bool parseOptionalShift(OperandVector &Operands);
+  bool parseOptionalExtend(OperandVector &Operands);
+  bool parseRegister(OperandVector &Operands);
+  bool parseMemory(OperandVector &Operands);
+  bool parseSymbolicImmVal(const MCExpr *&ImmVal);
+  bool parseVectorList(OperandVector &Operands);
+  bool parseOperand(OperandVector &Operands, bool isCondCode,
+                    bool invertCondCode);
+
+  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+  bool showMatchError(SMLoc Loc, unsigned ErrCode);
+
+  bool parseDirectiveWord(unsigned Size, SMLoc L);
+  bool parseDirectiveTLSDescCall(SMLoc L);
+
+  bool parseDirectiveLOH(StringRef LOH, SMLoc L);
+
+  bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc);
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               OperandVector &Operands, MCStreamer &Out,
+                               unsigned &ErrorInfo, bool MatchingInlineAsm);
+/// @name Auto-generated Match Functions
+/// {
+
+#define GET_ASSEMBLER_HEADER
+#include "ARM64GenAsmMatcher.inc"
+
+  /// }
+
+  OperandMatchResultTy tryParseNoIndexMemory(OperandVector &Operands);
+  OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
+  OperandMatchResultTy tryParseSystemRegister(OperandVector &Operands);
+  OperandMatchResultTy tryParseCPSRField(OperandVector &Operands);
+  OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands);
+  OperandMatchResultTy tryParsePrefetch(OperandVector &Operands);
+  OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands);
+  OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands);
+  OperandMatchResultTy tryParseFPImm(OperandVector &Operands);
+  bool tryParseVectorRegister(OperandVector &Operands);
+
+public:
+  enum ARM64MatchResultTy {
+    Match_InvalidSuffix = FIRST_TARGET_MATCH_RESULT_TY,
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "ARM64GenAsmMatcher.inc"
+  };
+  ARM64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+                 const MCInstrInfo &MII)
+      : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+    MCAsmParserExtension::Initialize(_Parser);
+  }
+
+  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                                SMLoc NameLoc, OperandVector &Operands);
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+  virtual bool ParseDirective(AsmToken DirectiveID);
+  unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind);
+
+  static bool classifySymbolRef(const MCExpr *Expr,
+                                ARM64MCExpr::VariantKind &ELFRefKind,
+                                MCSymbolRefExpr::VariantKind &DarwinRefKind,
+                                const MCConstantExpr *&Addend);
+};
+} // end anonymous namespace
+
+namespace {
+
+/// ARM64Operand - Instances of this class represent a parsed ARM64 machine
+/// instruction.
+class ARM64Operand : public MCParsedAsmOperand {
+public:
+  enum MemIdxKindTy {
+    ImmediateOffset, // pre-indexed, no writeback
+    RegisterOffset   // register offset, with optional extend
+  };
+
+private:
+  enum KindTy {
+    k_Immediate,
+    k_Memory,
+    k_Register,
+    k_VectorList,
+    k_VectorIndex,
+    k_Token,
+    k_SysCR,
+    k_Prefetch,
+    k_Shifter,
+    k_Extend,
+    k_FPImm,
+    k_Barrier,
+    k_SystemRegister,
+    k_CPSRField
+  } Kind;
+
+  SMLoc StartLoc, EndLoc, OffsetLoc;
+
+  struct TokOp {
+    const char *Data;
+    unsigned Length;
+    bool IsSuffix; // Is the operand actually a suffix on the mnemonic.
+  };
+
+  struct RegOp {
+    unsigned RegNum;
+    bool isVector;
+  };
+
+  struct VectorListOp {
+    unsigned RegNum;
+    unsigned Count;
+    unsigned NumElements;
+    unsigned ElementKind;
+  };
+
+  struct VectorIndexOp {
+    unsigned Val;
+  };
+
+  struct ImmOp {
+    const MCExpr *Val;
+  };
+
+  struct FPImmOp {
+    unsigned Val; // Encoded 8-bit representation.
+  };
+
+  struct BarrierOp {
+    unsigned Val; // Not the enum since not all values have names.
+  };
+
+  struct SystemRegisterOp {
+    // 16-bit immediate, usually from the ARM64SYS::SystermRegister enum,
+    // but not limited to those values.
+    uint16_t Val;
+  };
+
+  struct CPSRFieldOp {
+    ARM64SYS::CPSRField Field;
+  };
+
+  struct SysCRImmOp {
+    unsigned Val;
+  };
+
+  struct PrefetchOp {
+    unsigned Val;
+  };
+
+  struct ShifterOp {
+    unsigned Val;
+  };
+
+  struct ExtendOp {
+    unsigned Val;
+  };
+
+  // This is for all forms of ARM64 address expressions
+  struct MemOp {
+    unsigned BaseRegNum, OffsetRegNum;
+    ARM64_AM::ExtendType ExtType;
+    unsigned ShiftVal;
+    bool ExplicitShift;
+    const MCExpr *OffsetImm;
+    MemIdxKindTy Mode;
+  };
+
+  union {
+    struct TokOp Tok;
+    struct RegOp Reg;
+    struct VectorListOp VectorList;
+    struct VectorIndexOp VectorIndex;
+    struct ImmOp Imm;
+    struct FPImmOp FPImm;
+    struct BarrierOp Barrier;
+    struct SystemRegisterOp SystemRegister;
+    struct CPSRFieldOp CPSRField;
+    struct SysCRImmOp SysCRImm;
+    struct PrefetchOp Prefetch;
+    struct ShifterOp Shifter;
+    struct ExtendOp Extend;
+    struct MemOp Mem;
+  };
+
+  // Keep the MCContext around as the MCExprs may need manipulated during
+  // the add<>Operands() calls.
+  MCContext &Ctx;
+
+  ARM64Operand(KindTy K, MCContext &_Ctx)
+      : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {}
+
+public:
+  ARM64Operand(const ARM64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
+    Kind = o.Kind;
+    StartLoc = o.StartLoc;
+    EndLoc = o.EndLoc;
+    switch (Kind) {
+    case k_Token:
+      Tok = o.Tok;
+      break;
+    case k_Immediate:
+      Imm = o.Imm;
+      break;
+    case k_FPImm:
+      FPImm = o.FPImm;
+      break;
+    case k_Barrier:
+      Barrier = o.Barrier;
+      break;
+    case k_SystemRegister:
+      SystemRegister = o.SystemRegister;
+      break;
+    case k_CPSRField:
+      CPSRField = o.CPSRField;
+      break;
+    case k_Register:
+      Reg = o.Reg;
+      break;
+    case k_VectorList:
+      VectorList = o.VectorList;
+      break;
+    case k_VectorIndex:
+      VectorIndex = o.VectorIndex;
+      break;
+    case k_SysCR:
+      SysCRImm = o.SysCRImm;
+      break;
+    case k_Prefetch:
+      Prefetch = o.Prefetch;
+      break;
+    case k_Memory:
+      Mem = o.Mem;
+      break;
+    case k_Shifter:
+      Shifter = o.Shifter;
+      break;
+    case k_Extend:
+      Extend = o.Extend;
+      break;
+    }
+  }
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
+  /// getOffsetLoc - Get the location of the offset of this memory operand.
+  SMLoc getOffsetLoc() const { return OffsetLoc; }
+
+  StringRef getToken() const {
+    assert(Kind == k_Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  bool isTokenSuffix() const {
+    assert(Kind == k_Token && "Invalid access!");
+    return Tok.IsSuffix;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == k_Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  unsigned getFPImm() const {
+    assert(Kind == k_FPImm && "Invalid access!");
+    return FPImm.Val;
+  }
+
+  unsigned getBarrier() const {
+    assert(Kind == k_Barrier && "Invalid access!");
+    return Barrier.Val;
+  }
+
+  uint16_t getSystemRegister() const {
+    assert(Kind == k_SystemRegister && "Invalid access!");
+    return SystemRegister.Val;
+  }
+
+  ARM64SYS::CPSRField getCPSRField() const {
+    assert(Kind == k_CPSRField && "Invalid access!");
+    return CPSRField.Field;
+  }
+
+  unsigned getReg() const {
+    assert(Kind == k_Register && "Invalid access!");
+    return Reg.RegNum;
+  }
+
+  unsigned getVectorListStart() const {
+    assert(Kind == k_VectorList && "Invalid access!");
+    return VectorList.RegNum;
+  }
+
+  unsigned getVectorListCount() const {
+    assert(Kind == k_VectorList && "Invalid access!");
+    return VectorList.Count;
+  }
+
+  unsigned getVectorIndex() const {
+    assert(Kind == k_VectorIndex && "Invalid access!");
+    return VectorIndex.Val;
+  }
+
+  unsigned getSysCR() const {
+    assert(Kind == k_SysCR && "Invalid access!");
+    return SysCRImm.Val;
+  }
+
+  unsigned getPrefetch() const {
+    assert(Kind == k_Prefetch && "Invalid access!");
+    return Prefetch.Val;
+  }
+
+  unsigned getShifter() const {
+    assert(Kind == k_Shifter && "Invalid access!");
+    return Shifter.Val;
+  }
+
+  unsigned getExtend() const {
+    assert(Kind == k_Extend && "Invalid access!");
+    return Extend.Val;
+  }
+
+  bool isImm() const { return Kind == k_Immediate; }
+  bool isSImm9() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= -256 && Val < 256);
+  }
+  bool isSImm7s4() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= -256 && Val <= 252 && (Val & 3) == 0);
+  }
+  bool isSImm7s8() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= -512 && Val <= 504 && (Val & 7) == 0);
+  }
+  bool isSImm7s16() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= -1024 && Val <= 1008 && (Val & 15) == 0);
+  }
+  bool isImm0_7() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= 0 && Val < 8);
+  }
+  bool isImm1_8() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val > 0 && Val < 9);
+  }
+  bool isImm0_15() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= 0 && Val < 16);
+  }
+  bool isImm1_16() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val > 0 && Val < 17);
+  }
+  bool isImm0_31() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= 0 && Val < 32);
+  }
+  bool isImm1_31() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= 1 && Val < 32);
+  }
+  bool isImm1_32() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= 1 && Val < 33);
+  }
+  bool isImm0_63() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= 0 && Val < 64);
+  }
+  bool isImm1_63() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= 1 && Val < 64);
+  }
+  bool isImm1_64() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= 1 && Val < 65);
+  }
+  bool isImm0_127() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= 0 && Val < 128);
+  }
+  bool isImm0_255() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= 0 && Val < 256);
+  }
+  bool isImm0_65535() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= 0 && Val < 65536);
+  }
+  bool isLogicalImm32() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    return ARM64_AM::isLogicalImmediate(MCE->getValue(), 32);
+  }
+  bool isLogicalImm64() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    return ARM64_AM::isLogicalImmediate(MCE->getValue(), 64);
+  }
+  bool isSIMDImmType10() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    return ARM64_AM::isAdvSIMDModImmType10(MCE->getValue());
+  }
+  bool isBranchTarget26() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return true;
+    int64_t Val = MCE->getValue();
+    if (Val & 0x3)
+      return false;
+    return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2));
+  }
+  bool isBranchTarget19() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return true;
+    int64_t Val = MCE->getValue();
+    if (Val & 0x3)
+      return false;
+    return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2));
+  }
+  bool isBranchTarget14() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return true;
+    int64_t Val = MCE->getValue();
+    if (Val & 0x3)
+      return false;
+    return (Val >= -(0x2000 << 2) && Val <= (0x1fff << 2));
+  }
+
+  bool isMovWSymbol(ArrayRef<ARM64MCExpr::VariantKind> AllowedModifiers) const {
+    if (!isImm())
+      return false;
+
+    ARM64MCExpr::VariantKind ELFRefKind;
+    MCSymbolRefExpr::VariantKind DarwinRefKind;
+    const MCConstantExpr *Addend;
+    if (!ARM64AsmParser::classifySymbolRef(getImm(), ELFRefKind, DarwinRefKind,
+                                           Addend)) {
+      return false;
+    }
+    if (DarwinRefKind != MCSymbolRefExpr::VK_None)
+      return false;
+
+    for (unsigned i = 0; i != AllowedModifiers.size(); ++i) {
+      if (ELFRefKind == AllowedModifiers[i])
+        return Addend == 0;
+    }
+
+    return false;
+  }
+
+  bool isMovZSymbolG3() const {
+    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G3 };
+    return isMovWSymbol(Variants);
+  }
+
+  bool isMovZSymbolG2() const {
+    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2,
+                                                   ARM64MCExpr::VK_TPREL_G2,
+                                                   ARM64MCExpr::VK_DTPREL_G2 };
+    return isMovWSymbol(Variants);
+  }
+
+  bool isMovZSymbolG1() const {
+    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G1,
+                                                   ARM64MCExpr::VK_GOTTPREL_G1,
+                                                   ARM64MCExpr::VK_TPREL_G1,
+                                                   ARM64MCExpr::VK_DTPREL_G1, };
+    return isMovWSymbol(Variants);
+  }
+
+  bool isMovZSymbolG0() const {
+    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G0,
+                                                   ARM64MCExpr::VK_TPREL_G0,
+                                                   ARM64MCExpr::VK_DTPREL_G0 };
+    return isMovWSymbol(Variants);
+  }
+
+  bool isMovKSymbolG2() const {
+    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2_NC };
+    return isMovWSymbol(Variants);
+  }
+
+  bool isMovKSymbolG1() const {
+    static ARM64MCExpr::VariantKind Variants[] = {
+      ARM64MCExpr::VK_ABS_G1_NC, ARM64MCExpr::VK_TPREL_G1_NC,
+      ARM64MCExpr::VK_DTPREL_G1_NC
+    };
+    return isMovWSymbol(Variants);
+  }
+
+  bool isMovKSymbolG0() const {
+    static ARM64MCExpr::VariantKind Variants[] = {
+      ARM64MCExpr::VK_ABS_G0_NC,   ARM64MCExpr::VK_GOTTPREL_G0_NC,
+      ARM64MCExpr::VK_TPREL_G0_NC, ARM64MCExpr::VK_DTPREL_G0_NC
+    };
+    return isMovWSymbol(Variants);
+  }
+
+  bool isFPImm() const { return Kind == k_FPImm; }
+  bool isBarrier() const { return Kind == k_Barrier; }
+  bool isSystemRegister() const {
+    if (Kind == k_SystemRegister)
+      return true;
+    // SPSel is legal for both the system register and the CPSR-field
+    // variants of MSR, so special case that. Fugly.
+    return (Kind == k_CPSRField && getCPSRField() == ARM64SYS::cpsr_SPSel);
+  }
+  bool isSystemCPSRField() const { return Kind == k_CPSRField; }
+  bool isReg() const { return Kind == k_Register && !Reg.isVector; }
+  bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
+
+  /// Is this a vector list with the type implicit (presumably attached to the
+  /// instruction itself)?
+  template <unsigned NumRegs> bool isImplicitlyTypedVectorList() const {
+    return Kind == k_VectorList && VectorList.Count == NumRegs &&
+           !VectorList.ElementKind;
+  }
+
+  template <unsigned NumRegs, unsigned NumElements, char ElementKind>
+  bool isTypedVectorList() const {
+    if (Kind != k_VectorList)
+      return false;
+    if (VectorList.Count != NumRegs)
+      return false;
+    if (VectorList.ElementKind != ElementKind)
+      return false;
+    return VectorList.NumElements == NumElements;
+  }
+
+  bool isVectorIndexB() const {
+    return Kind == k_VectorIndex && VectorIndex.Val < 16;
+  }
+  bool isVectorIndexH() const {
+    return Kind == k_VectorIndex && VectorIndex.Val < 8;
+  }
+  bool isVectorIndexS() const {
+    return Kind == k_VectorIndex && VectorIndex.Val < 4;
+  }
+  bool isVectorIndexD() const {
+    return Kind == k_VectorIndex && VectorIndex.Val < 2;
+  }
+  bool isToken() const { return Kind == k_Token; }
+  bool isTokenEqual(StringRef Str) const {
+    return Kind == k_Token && getToken() == Str;
+  }
+  bool isMem() const { return Kind == k_Memory; }
+  bool isSysCR() const { return Kind == k_SysCR; }
+  bool isPrefetch() const { return Kind == k_Prefetch; }
+  bool isShifter() const { return Kind == k_Shifter; }
+  bool isExtend() const {
+    // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand.
+    if (isShifter()) {
+      ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
+      return ST == ARM64_AM::LSL;
+    }
+    return Kind == k_Extend;
+  }
+  bool isExtend64() const {
+    if (Kind != k_Extend)
+      return false;
+    // UXTX and SXTX require a 64-bit source register (the ExtendLSL64 class).
+    ARM64_AM::ExtendType ET = ARM64_AM::getArithExtendType(Extend.Val);
+    return ET != ARM64_AM::UXTX && ET != ARM64_AM::SXTX;
+  }
+  bool isExtendLSL64() const {
+    // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand.
+    if (isShifter()) {
+      ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
+      return ST == ARM64_AM::LSL;
+    }
+    if (Kind != k_Extend)
+      return false;
+    ARM64_AM::ExtendType ET = ARM64_AM::getArithExtendType(Extend.Val);
+    return ET == ARM64_AM::UXTX || ET == ARM64_AM::SXTX;
+  }
+
+  bool isArithmeticShifter() const {
+    if (!isShifter())
+      return false;
+
+    // An arithmetic shifter is LSL, LSR, or ASR.
+    ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
+    return ST == ARM64_AM::LSL || ST == ARM64_AM::LSR || ST == ARM64_AM::ASR;
+  }
+
+  bool isMovImm32Shifter() const {
+    if (!isShifter())
+      return false;
+
+    // A MOVi shifter is LSL of 0, 16, 32, or 48.
+    ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
+    if (ST != ARM64_AM::LSL)
+      return false;
+    uint64_t Val = ARM64_AM::getShiftValue(Shifter.Val);
+    return (Val == 0 || Val == 16);
+  }
+
+  bool isMovImm64Shifter() const {
+    if (!isShifter())
+      return false;
+
+    // A MOVi shifter is LSL of 0 or 16.
+    ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
+    if (ST != ARM64_AM::LSL)
+      return false;
+    uint64_t Val = ARM64_AM::getShiftValue(Shifter.Val);
+    return (Val == 0 || Val == 16 || Val == 32 || Val == 48);
+  }
+
+  bool isAddSubShifter() const {
+    if (!isShifter())
+      return false;
+
+    // An ADD/SUB shifter is either 'lsl #0' or 'lsl #12'.
+    unsigned Val = Shifter.Val;
+    return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL &&
+           (ARM64_AM::getShiftValue(Val) == 0 ||
+            ARM64_AM::getShiftValue(Val) == 12);
+  }
+
+  bool isLogicalVecShifter() const {
+    if (!isShifter())
+      return false;
+
+    // A logical vector shifter is a left shift by 0, 8, 16, or 24.
+    unsigned Val = Shifter.Val;
+    unsigned Shift = ARM64_AM::getShiftValue(Val);
+    return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL &&
+           (Shift == 0 || Shift == 8 || Shift == 16 || Shift == 24);
+  }
+
+  bool isLogicalVecHalfWordShifter() const {
+    if (!isLogicalVecShifter())
+      return false;
+
+    // A logical vector shifter is a left shift by 0 or 8.
+    unsigned Val = Shifter.Val;
+    unsigned Shift = ARM64_AM::getShiftValue(Val);
+    return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL &&
+           (Shift == 0 || Shift == 8);
+  }
+
+  bool isMoveVecShifter() const {
+    if (!isShifter())
+      return false;
+
+    // A logical vector shifter is a left shift by 8 or 16.
+    unsigned Val = Shifter.Val;
+    unsigned Shift = ARM64_AM::getShiftValue(Val);
+    return ARM64_AM::getShiftType(Val) == ARM64_AM::MSL &&
+           (Shift == 8 || Shift == 16);
+  }
+
+  bool isMemoryRegisterOffset8() const {
+    return isMem() && Mem.Mode == RegisterOffset && Mem.ShiftVal == 0;
+  }
+
+  bool isMemoryRegisterOffset16() const {
+    return isMem() && Mem.Mode == RegisterOffset &&
+           (Mem.ShiftVal == 0 || Mem.ShiftVal == 1);
+  }
+
+  bool isMemoryRegisterOffset32() const {
+    return isMem() && Mem.Mode == RegisterOffset &&
+           (Mem.ShiftVal == 0 || Mem.ShiftVal == 2);
+  }
+
+  bool isMemoryRegisterOffset64() const {
+    return isMem() && Mem.Mode == RegisterOffset &&
+           (Mem.ShiftVal == 0 || Mem.ShiftVal == 3);
+  }
+
+  bool isMemoryRegisterOffset128() const {
+    return isMem() && Mem.Mode == RegisterOffset &&
+           (Mem.ShiftVal == 0 || Mem.ShiftVal == 4);
+  }
+
+  bool isMemoryUnscaled() const {
+    if (!isMem())
+      return false;
+    if (Mem.Mode != ImmediateOffset)
+      return false;
+    if (!Mem.OffsetImm)
+      return true;
+    // Make sure the immediate value is valid.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
+    if (!CE)
+      return false;
+    // The offset must fit in a signed 9-bit unscaled immediate.
+    int64_t Value = CE->getValue();
+    return (Value >= -256 && Value < 256);
+  }
+  // Fallback unscaled operands are for aliases of LDR/STR that fall back
+  // to LDUR/STUR when the offset is not legal for the former but is for
+  // the latter. As such, in addition to checking for being a legal unscaled
+  // address, also check that it is not a legal scaled address. This avoids
+  // ambiguity in the matcher.
+  bool isMemoryUnscaledFB8() const {
+    return isMemoryUnscaled() && !isMemoryIndexed8();
+  }
+  bool isMemoryUnscaledFB16() const {
+    return isMemoryUnscaled() && !isMemoryIndexed16();
+  }
+  bool isMemoryUnscaledFB32() const {
+    return isMemoryUnscaled() && !isMemoryIndexed32();
+  }
+  bool isMemoryUnscaledFB64() const {
+    return isMemoryUnscaled() && !isMemoryIndexed64();
+  }
+  bool isMemoryUnscaledFB128() const {
+    return isMemoryUnscaled() && !isMemoryIndexed128();
+  }
+  bool isMemoryIndexed(unsigned Scale) const {
+    if (!isMem())
+      return false;
+    if (Mem.Mode != ImmediateOffset)
+      return false;
+    if (!Mem.OffsetImm)
+      return true;
+    // Make sure the immediate value is valid.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
+
+    if (CE) {
+      // The offset must be a positive multiple of the scale and in range of
+      // encoding with a 12-bit immediate.
+      int64_t Value = CE->getValue();
+      return (Value >= 0 && (Value % Scale) == 0 && Value <= (4095 * Scale));
+    }
+
+    // If it's not a constant, check for some expressions we know.
+    const MCExpr *Expr = Mem.OffsetImm;
+    ARM64MCExpr::VariantKind ELFRefKind;
+    MCSymbolRefExpr::VariantKind DarwinRefKind;
+    const MCConstantExpr *Addend;
+    if (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind,
+                                           Addend)) {
+      // If we don't understand the expression, assume the best and
+      // let the fixup and relocation code deal with it.
+      return true;
+    }
+
+    if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF ||
+        ELFRefKind == ARM64MCExpr::VK_LO12 ||
+        ELFRefKind == ARM64MCExpr::VK_GOT_LO12 ||
+        ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 ||
+        ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC ||
+        ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 ||
+        ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC ||
+        ELFRefKind == ARM64MCExpr::VK_GOTTPREL_LO12_NC ||
+        ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) {
+      // Note that we don't range-check the addend. It's adjusted modulo page
+      // size when converted, so there is no "out of range" condition when using
+      // @pageoff.
+      int64_t Value = Addend ? Addend->getValue() : 0;
+      return Value >= 0 && (Value % Scale) == 0;
+    } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF ||
+               DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) {
+      // @gotpageoff/@tlvppageoff can only be used directly, not with an addend.
+      return Addend == 0;
+    }
+
+    return false;
+  }
+  bool isMemoryIndexed128() const { return isMemoryIndexed(16); }
+  bool isMemoryIndexed64() const { return isMemoryIndexed(8); }
+  bool isMemoryIndexed32() const { return isMemoryIndexed(4); }
+  bool isMemoryIndexed16() const { return isMemoryIndexed(2); }
+  bool isMemoryIndexed8() const { return isMemoryIndexed(1); }
+  bool isMemoryNoIndex() const {
+    if (!isMem())
+      return false;
+    if (Mem.Mode != ImmediateOffset)
+      return false;
+    if (!Mem.OffsetImm)
+      return true;
+
+    // Make sure the immediate value is valid. Only zero is allowed.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
+    if (!CE || CE->getValue() != 0)
+      return false;
+    return true;
+  }
+  bool isMemorySIMDNoIndex() const {
+    if (!isMem())
+      return false;
+    if (Mem.Mode != ImmediateOffset)
+      return false;
+    return Mem.OffsetImm == 0;
+  }
+  bool isMemoryIndexedSImm9() const {
+    if (!isMem() || Mem.Mode != ImmediateOffset)
+      return false;
+    if (!Mem.OffsetImm)
+      return true;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
+    assert(CE && "Non-constant pre-indexed offset!");
+    int64_t Value = CE->getValue();
+    return Value >= -256 && Value <= 255;
+  }
+  bool isMemoryIndexed32SImm7() const {
+    if (!isMem() || Mem.Mode != ImmediateOffset)
+      return false;
+    if (!Mem.OffsetImm)
+      return true;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
+    assert(CE && "Non-constant pre-indexed offset!");
+    int64_t Value = CE->getValue();
+    return ((Value % 4) == 0) && Value >= -256 && Value <= 252;
+  }
+  bool isMemoryIndexed64SImm7() const {
+    if (!isMem() || Mem.Mode != ImmediateOffset)
+      return false;
+    if (!Mem.OffsetImm)
+      return true;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
+    assert(CE && "Non-constant pre-indexed offset!");
+    int64_t Value = CE->getValue();
+    return ((Value % 8) == 0) && Value >= -512 && Value <= 504;
+  }
+  bool isMemoryIndexed128SImm7() const {
+    if (!isMem() || Mem.Mode != ImmediateOffset)
+      return false;
+    if (!Mem.OffsetImm)
+      return true;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
+    assert(CE && "Non-constant pre-indexed offset!");
+    int64_t Value = CE->getValue();
+    return ((Value % 16) == 0) && Value >= -1024 && Value <= 1008;
+  }
+
+  bool isAdrpLabel() const {
+    // Validation was handled during parsing, so we just sanity check that
+    // something didn't go haywire.
+    return isImm();
+  }
+
+  bool isAdrLabel() const {
+    // Validation was handled during parsing, so we just sanity check that
+    // something didn't go haywire.
+    return isImm();
+  }
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.  Null MCExpr = 0.
+    if (Expr == 0)
+      Inst.addOperand(MCOperand::CreateImm(0));
+    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addVectorRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  template <unsigned NumRegs>
+  void addVectorList64Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    static unsigned FirstRegs[] = { ARM64::D0,       ARM64::D0_D1,
+                                    ARM64::D0_D1_D2, ARM64::D0_D1_D2_D3 };
+    unsigned FirstReg = FirstRegs[NumRegs - 1];
+
+    Inst.addOperand(
+        MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0));
+  }
+
+  template <unsigned NumRegs>
+  void addVectorList128Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    static unsigned FirstRegs[] = { ARM64::Q0,       ARM64::Q0_Q1,
+                                    ARM64::Q0_Q1_Q2, ARM64::Q0_Q1_Q2_Q3 };
+    unsigned FirstReg = FirstRegs[NumRegs - 1];
+
+    Inst.addOperand(
+        MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0));
+  }
+
+  void addVectorIndexBOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+  }
+
+  void addVectorIndexHOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+  }
+
+  void addVectorIndexSOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+  }
+
+  void addVectorIndexDOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // If this is a pageoff symrefexpr with an addend, adjust the addend
+    // to be only the page-offset portion. Otherwise, just add the expr
+    // as-is.
+    addExpr(Inst, getImm());
+  }
+
+  void addAdrpLabelOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void addSImm9Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addSImm7s4Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 4));
+  }
+
+  void addSImm7s8Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 8));
+  }
+
+  void addSImm7s16Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 16));
+  }
+
+  void addImm0_7Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addImm1_8Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addImm0_15Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addImm1_16Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addImm0_31Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addImm1_31Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addImm1_32Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addImm0_63Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addImm1_63Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addImm1_64Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addImm0_127Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addImm0_255Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+  }
+
+  void addLogicalImm32Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid logical immediate operand!");
+    uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 32);
+    Inst.addOperand(MCOperand::CreateImm(encoding));
+  }
+
+  void addLogicalImm64Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid logical immediate operand!");
+    uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 64);
+    Inst.addOperand(MCOperand::CreateImm(encoding));
+  }
+
+  void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    assert(MCE && "Invalid immediate operand!");
+    uint64_t encoding = ARM64_AM::encodeAdvSIMDModImmType10(MCE->getValue());
+    Inst.addOperand(MCOperand::CreateImm(encoding));
+  }
+
+  void addBranchTarget26Operands(MCInst &Inst, unsigned N) const {
+    // Branch operands don't encode the low bits, so shift them off
+    // here. If it's a label, however, just put it on directly as there's
+    // not enough information now to do anything.
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE) {
+      addExpr(Inst, getImm());
+      return;
+    }
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
+  }
+
+  void addBranchTarget19Operands(MCInst &Inst, unsigned N) const {
+    // Branch operands don't encode the low bits, so shift them off
+    // here. If it's a label, however, just put it on directly as there's
+    // not enough information now to do anything.
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE) {
+      addExpr(Inst, getImm());
+      return;
+    }
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
+  }
+
+  void addBranchTarget14Operands(MCInst &Inst, unsigned N) const {
+    // Branch operands don't encode the low bits, so shift them off
+    // here. If it's a label, however, just put it on directly as there's
+    // not enough information now to do anything.
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE) {
+      addExpr(Inst, getImm());
+      return;
+    }
+    assert(MCE && "Invalid constant immediate operand!");
+    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
+  }
+
+  void addFPImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getFPImm()));
+  }
+
+  void addBarrierOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getBarrier()));
+  }
+
+  void addSystemRegisterOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    if (Kind == k_SystemRegister)
+      Inst.addOperand(MCOperand::CreateImm(getSystemRegister()));
+    else {
+      assert(Kind == k_CPSRField && getCPSRField() == ARM64SYS::cpsr_SPSel);
+      Inst.addOperand(MCOperand::CreateImm(ARM64SYS::SPSel));
+    }
+  }
+
+  void addSystemCPSRFieldOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getCPSRField()));
+  }
+
+  void addSysCROperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getSysCR()));
+  }
+
+  void addPrefetchOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getPrefetch()));
+  }
+
+  void addShifterOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getShifter()));
+  }
+
+  void addArithmeticShifterOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getShifter()));
+  }
+
+  void addMovImm32ShifterOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getShifter()));
+  }
+
+  void addMovImm64ShifterOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getShifter()));
+  }
+
+  void addAddSubShifterOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getShifter()));
+  }
+
+  void addLogicalVecShifterOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getShifter()));
+  }
+
+  void addLogicalVecHalfWordShifterOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getShifter()));
+  }
+
+  void addMoveVecShifterOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getShifter()));
+  }
+
+  void addExtendOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand.
+    if (isShifter()) {
+      assert(ARM64_AM::getShiftType(getShifter()) == ARM64_AM::LSL);
+      unsigned imm = getArithExtendImm(ARM64_AM::UXTX,
+                                       ARM64_AM::getShiftValue(getShifter()));
+      Inst.addOperand(MCOperand::CreateImm(imm));
+    } else
+      Inst.addOperand(MCOperand::CreateImm(getExtend()));
+  }
+
+  void addExtend64Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getExtend()));
+  }
+
+  void addExtendLSL64Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand.
+    if (isShifter()) {
+      assert(ARM64_AM::getShiftType(getShifter()) == ARM64_AM::LSL);
+      unsigned imm = getArithExtendImm(ARM64_AM::UXTX,
+                                       ARM64_AM::getShiftValue(getShifter()));
+      Inst.addOperand(MCOperand::CreateImm(imm));
+    } else
+      Inst.addOperand(MCOperand::CreateImm(getExtend()));
+  }
+
+  void addMemoryRegisterOffsetOperands(MCInst &Inst, unsigned N, bool DoShift) {
+    assert(N == 3 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
+    Inst.addOperand(MCOperand::CreateReg(Mem.OffsetRegNum));
+    unsigned ExtendImm = ARM64_AM::getMemExtendImm(Mem.ExtType, DoShift);
+    Inst.addOperand(MCOperand::CreateImm(ExtendImm));
+  }
+
+  void addMemoryRegisterOffset8Operands(MCInst &Inst, unsigned N) {
+    addMemoryRegisterOffsetOperands(Inst, N, Mem.ExplicitShift);
+  }
+
+  void addMemoryRegisterOffset16Operands(MCInst &Inst, unsigned N) {
+    addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 1);
+  }
+
+  void addMemoryRegisterOffset32Operands(MCInst &Inst, unsigned N) {
+    addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 2);
+  }
+
+  void addMemoryRegisterOffset64Operands(MCInst &Inst, unsigned N) {
+    addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 3);
+  }
+
+  void addMemoryRegisterOffset128Operands(MCInst &Inst, unsigned N) {
+    addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 4);
+  }
+
+  void addMemoryIndexedOperands(MCInst &Inst, unsigned N,
+                                unsigned Scale) const {
+    // Add the base register operand.
+    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
+
+    if (!Mem.OffsetImm) {
+      // There isn't an offset.
+      Inst.addOperand(MCOperand::CreateImm(0));
+      return;
+    }
+
+    // Add the offset operand.
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm)) {
+      assert(CE->getValue() % Scale == 0 &&
+             "Offset operand must be multiple of the scale!");
+
+      // The MCInst offset operand doesn't include the low bits (like the
+      // instruction encoding).
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue() / Scale));
+    }
+
+    // If this is a pageoff symrefexpr with an addend, the linker will
+    // do the scaling of the addend.
+    //
+    // Otherwise we don't know what this is, so just add the scaling divide to
+    // the expression and let the MC fixup evaluation code deal with it.
+    const MCExpr *Expr = Mem.OffsetImm;
+    ARM64MCExpr::VariantKind ELFRefKind;
+    MCSymbolRefExpr::VariantKind DarwinRefKind;
+    const MCConstantExpr *Addend;
+    if (Scale > 1 &&
+        (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind,
+                                            Addend) ||
+         (Addend != 0 && DarwinRefKind != MCSymbolRefExpr::VK_PAGEOFF))) {
+      Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(Scale, Ctx),
+                                     Ctx);
+    }
+
+    Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+  void addMemoryUnscaledOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemoryUnscaled() && "Invalid number of operands!");
+    // Add the base register operand.
+    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
+
+    // Add the offset operand.
+    if (!Mem.OffsetImm)
+      Inst.addOperand(MCOperand::CreateImm(0));
+    else {
+      // Only constant offsets supported.
+      const MCConstantExpr *CE = cast<MCConstantExpr>(Mem.OffsetImm);
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    }
+  }
+
+  void addMemoryIndexed128Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemoryIndexed128() && "Invalid number of operands!");
+    addMemoryIndexedOperands(Inst, N, 16);
+  }
+
+  void addMemoryIndexed64Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemoryIndexed64() && "Invalid number of operands!");
+    addMemoryIndexedOperands(Inst, N, 8);
+  }
+
+  void addMemoryIndexed32Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemoryIndexed32() && "Invalid number of operands!");
+    addMemoryIndexedOperands(Inst, N, 4);
+  }
+
+  void addMemoryIndexed16Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemoryIndexed16() && "Invalid number of operands!");
+    addMemoryIndexedOperands(Inst, N, 2);
+  }
+
+  void addMemoryIndexed8Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemoryIndexed8() && "Invalid number of operands!");
+    addMemoryIndexedOperands(Inst, N, 1);
+  }
+
+  void addMemoryNoIndexOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && isMemoryNoIndex() && "Invalid number of operands!");
+    // Add the base register operand (the offset is always zero, so ignore it).
+    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
+  }
+
+  void addMemorySIMDNoIndexOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && isMemorySIMDNoIndex() && "Invalid number of operands!");
+    // Add the base register operand (the offset is always zero, so ignore it).
+    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
+  }
+
+  void addMemoryWritebackIndexedOperands(MCInst &Inst, unsigned N,
+                                         unsigned Scale) const {
+    assert(N == 2 && "Invalid number of operands!");
+
+    // Add the base register operand.
+    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
+
+    // Add the offset operand.
+    int64_t Offset = 0;
+    if (Mem.OffsetImm) {
+      const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
+      assert(CE && "Non-constant indexed offset operand!");
+      Offset = CE->getValue();
+    }
+
+    if (Scale != 1) {
+      assert(Offset % Scale == 0 &&
+             "Offset operand must be a multiple of the scale!");
+      Offset /= Scale;
+    }
+
+    Inst.addOperand(MCOperand::CreateImm(Offset));
+  }
+
+  void addMemoryIndexedSImm9Operands(MCInst &Inst, unsigned N) const {
+    addMemoryWritebackIndexedOperands(Inst, N, 1);
+  }
+
+  void addMemoryIndexed32SImm7Operands(MCInst &Inst, unsigned N) const {
+    addMemoryWritebackIndexedOperands(Inst, N, 4);
+  }
+
+  void addMemoryIndexed64SImm7Operands(MCInst &Inst, unsigned N) const {
+    addMemoryWritebackIndexedOperands(Inst, N, 8);
+  }
+
+  void addMemoryIndexed128SImm7Operands(MCInst &Inst, unsigned N) const {
+    addMemoryWritebackIndexedOperands(Inst, N, 16);
+  }
+
+  virtual void print(raw_ostream &OS) const;
+
+  static ARM64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S,
+                                   MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_Token, Ctx);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->Tok.IsSuffix = IsSuffix;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARM64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S,
+                                 SMLoc E, MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_Register, Ctx);
+    Op->Reg.RegNum = RegNum;
+    Op->Reg.isVector = isVector;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARM64Operand *CreateVectorList(unsigned RegNum, unsigned Count,
+                                        unsigned NumElements, char ElementKind,
+                                        SMLoc S, SMLoc E, MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_VectorList, Ctx);
+    Op->VectorList.RegNum = RegNum;
+    Op->VectorList.Count = Count;
+    Op->VectorList.NumElements = NumElements;
+    Op->VectorList.ElementKind = ElementKind;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARM64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
+                                         MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_VectorIndex, Ctx);
+    Op->VectorIndex.Val = Idx;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARM64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E,
+                                 MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_Immediate, Ctx);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARM64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_FPImm, Ctx);
+    Op->FPImm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARM64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_Barrier, Ctx);
+    Op->Barrier.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARM64Operand *CreateSystemRegister(uint16_t Val, SMLoc S,
+                                            MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_SystemRegister, Ctx);
+    Op->SystemRegister.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARM64Operand *CreateCPSRField(ARM64SYS::CPSRField Field, SMLoc S,
+                                       MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_CPSRField, Ctx);
+    Op->CPSRField.Field = Field;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARM64Operand *CreateMem(unsigned BaseRegNum, const MCExpr *Off,
+                                 SMLoc S, SMLoc E, SMLoc OffsetLoc,
+                                 MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_Memory, Ctx);
+    Op->Mem.BaseRegNum = BaseRegNum;
+    Op->Mem.OffsetRegNum = 0;
+    Op->Mem.OffsetImm = Off;
+    Op->Mem.ExtType = ARM64_AM::UXTX;
+    Op->Mem.ShiftVal = 0;
+    Op->Mem.ExplicitShift = false;
+    Op->Mem.Mode = ImmediateOffset;
+    Op->OffsetLoc = OffsetLoc;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARM64Operand *CreateRegOffsetMem(unsigned BaseReg, unsigned OffsetReg,
+                                          ARM64_AM::ExtendType ExtType,
+                                          unsigned ShiftVal, bool ExplicitShift,
+                                          SMLoc S, SMLoc E, MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_Memory, Ctx);
+    Op->Mem.BaseRegNum = BaseReg;
+    Op->Mem.OffsetRegNum = OffsetReg;
+    Op->Mem.OffsetImm = 0;
+    Op->Mem.ExtType = ExtType;
+    Op->Mem.ShiftVal = ShiftVal;
+    Op->Mem.ExplicitShift = ExplicitShift;
+    Op->Mem.Mode = RegisterOffset;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARM64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E,
+                                   MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_SysCR, Ctx);
+    Op->SysCRImm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARM64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_Prefetch, Ctx);
+    Op->Prefetch.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARM64Operand *CreateShifter(ARM64_AM::ShiftType ShOp, unsigned Val,
+                                     SMLoc S, SMLoc E, MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_Shifter, Ctx);
+    Op->Shifter.Val = ARM64_AM::getShifterImm(ShOp, Val);
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARM64Operand *CreateExtend(ARM64_AM::ExtendType ExtOp, unsigned Val,
+                                    SMLoc S, SMLoc E, MCContext &Ctx) {
+    ARM64Operand *Op = new ARM64Operand(k_Extend, Ctx);
+    Op->Extend.Val = ARM64_AM::getArithExtendImm(ExtOp, Val);
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+};
+
+} // end anonymous namespace.
+
+void ARM64Operand::print(raw_ostream &OS) const {
+  switch (Kind) {
+  case k_FPImm:
+    OS << "<fpimm " << getFPImm() << "(" << ARM64_AM::getFPImmFloat(getFPImm())
+       << ") >";
+    break;
+  case k_Barrier: {
+    const char *Name =
+        ARM64SYS::getBarrierOptName((ARM64SYS::BarrierOption)getBarrier());
+    OS << "<barrier ";
+    if (Name)
+      OS << Name;
+    else
+      OS << getBarrier();
+    OS << ">";
+    break;
+  }
+  case k_SystemRegister: {
+    const char *Name = ARM64SYS::getSystemRegisterName(
+        (ARM64SYS::SystemRegister)getSystemRegister());
+    OS << "<systemreg ";
+    if (Name)
+      OS << Name;
+    else
+      OS << "#" << getSystemRegister();
+    OS << ">";
+    break;
+  }
+  case k_CPSRField: {
+    const char *Name = ARM64SYS::getCPSRFieldName(getCPSRField());
+    OS << "<cpsrfield " << Name << ">";
+    break;
+  }
+  case k_Immediate:
+    getImm()->print(OS);
+    break;
+  case k_Memory:
+    OS << "<memory>";
+    break;
+  case k_Register:
+    OS << "<register " << getReg() << ">";
+    break;
+  case k_VectorList: {
+    OS << "<vectorlist ";
+    unsigned Reg = getVectorListStart();
+    for (unsigned i = 0, e = getVectorListCount(); i != e; ++i)
+      OS << Reg + i << " ";
+    OS << ">";
+    break;
+  }
+  case k_VectorIndex:
+    OS << "<vectorindex " << getVectorIndex() << ">";
+    break;
+  case k_Token:
+    OS << "'" << getToken() << "'";
+    break;
+  case k_SysCR:
+    OS << "c" << getSysCR();
+    break;
+  case k_Prefetch:
+    OS << "<prfop ";
+    if (ARM64_AM::isNamedPrefetchOp(getPrefetch()))
+      OS << ARM64_AM::getPrefetchOpName((ARM64_AM::PrefetchOp)getPrefetch());
+    else
+      OS << "#" << getPrefetch();
+    OS << ">";
+    break;
+  case k_Shifter: {
+    unsigned Val = getShifter();
+    OS << "<" << ARM64_AM::getShiftName(ARM64_AM::getShiftType(Val)) << " #"
+       << ARM64_AM::getShiftValue(Val) << ">";
+    break;
+  }
+  case k_Extend: {
+    unsigned Val = getExtend();
+    OS << "<" << ARM64_AM::getExtendName(ARM64_AM::getArithExtendType(Val))
+       << " #" << ARM64_AM::getArithShiftValue(Val) << ">";
+    break;
+  }
+  }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
+static unsigned matchVectorRegName(StringRef Name) {
+  return StringSwitch<unsigned>(Name)
+      .Case("v0", ARM64::Q0)
+      .Case("v1", ARM64::Q1)
+      .Case("v2", ARM64::Q2)
+      .Case("v3", ARM64::Q3)
+      .Case("v4", ARM64::Q4)
+      .Case("v5", ARM64::Q5)
+      .Case("v6", ARM64::Q6)
+      .Case("v7", ARM64::Q7)
+      .Case("v8", ARM64::Q8)
+      .Case("v9", ARM64::Q9)
+      .Case("v10", ARM64::Q10)
+      .Case("v11", ARM64::Q11)
+      .Case("v12", ARM64::Q12)
+      .Case("v13", ARM64::Q13)
+      .Case("v14", ARM64::Q14)
+      .Case("v15", ARM64::Q15)
+      .Case("v16", ARM64::Q16)
+      .Case("v17", ARM64::Q17)
+      .Case("v18", ARM64::Q18)
+      .Case("v19", ARM64::Q19)
+      .Case("v20", ARM64::Q20)
+      .Case("v21", ARM64::Q21)
+      .Case("v22", ARM64::Q22)
+      .Case("v23", ARM64::Q23)
+      .Case("v24", ARM64::Q24)
+      .Case("v25", ARM64::Q25)
+      .Case("v26", ARM64::Q26)
+      .Case("v27", ARM64::Q27)
+      .Case("v28", ARM64::Q28)
+      .Case("v29", ARM64::Q29)
+      .Case("v30", ARM64::Q30)
+      .Case("v31", ARM64::Q31)
+      .Default(0);
+}
+
+static bool isValidVectorKind(StringRef Name) {
+  return StringSwitch<bool>(Name.lower())
+      .Case(".8b", true)
+      .Case(".16b", true)
+      .Case(".4h", true)
+      .Case(".8h", true)
+      .Case(".2s", true)
+      .Case(".4s", true)
+      .Case(".1d", true)
+      .Case(".2d", true)
+      .Case(".1q", true)
+      // Accept the width neutral ones, too, for verbose syntax. If those
+      // aren't used in the right places, the token operand won't match so
+      // all will work out.
+      .Case(".b", true)
+      .Case(".h", true)
+      .Case(".s", true)
+      .Case(".d", true)
+      .Default(false);
+}
+
+static void parseValidVectorKind(StringRef Name, unsigned &NumElements,
+                                 char &ElementKind) {
+  assert(isValidVectorKind(Name));
+
+  ElementKind = Name.lower()[Name.size() - 1];
+  NumElements = 0;
+
+  if (Name.size() == 2)
+    return;
+
+  // Parse the lane count
+  Name = Name.drop_front();
+  while (isdigit(Name.front())) {
+    NumElements = 10 * NumElements + (Name.front() - '0');
+    Name = Name.drop_front();
+  }
+}
+
+bool ARM64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+                                   SMLoc &EndLoc) {
+  StartLoc = getLoc();
+  RegNo = tryParseRegister();
+  EndLoc = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+  return (RegNo == (unsigned)-1);
+}
+
+/// tryParseRegister - Try to parse a register name. The token must be an
+/// Identifier when called, and if it is a register name the token is eaten and
+/// the register is added to the operand list.
+int ARM64AsmParser::tryParseRegister() {
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+  std::string lowerCase = Tok.getString().lower();
+  unsigned RegNum = MatchRegisterName(lowerCase);
+  // Also handle a few aliases of registers.
+  if (RegNum == 0)
+    RegNum = StringSwitch<unsigned>(lowerCase)
+                 .Case("x29", ARM64::FP)
+                 .Case("x30", ARM64::LR)
+                 .Case("x31", ARM64::XZR)
+                 .Case("w31", ARM64::WZR)
+                 .Default(0);
+
+  if (RegNum == 0)
+    return -1;
+
+  Parser.Lex(); // Eat identifier token.
+  return RegNum;
+}
+
+/// tryMatchVectorRegister - Try to parse a vector register name with optional
+/// kind specifier. If it is a register specifier, eat the token and return it.
+int ARM64AsmParser::tryMatchVectorRegister(StringRef &Kind) {
+  if (Parser.getTok().isNot(AsmToken::Identifier)) {
+    TokError("vector register expected");
+    return -1;
+  }
+
+  StringRef Name = Parser.getTok().getString();
+  // If there is a kind specifier, it's separated from the register name by
+  // a '.'.
+  size_t Start = 0, Next = Name.find('.');
+  StringRef Head = Name.slice(Start, Next);
+  unsigned RegNum = matchVectorRegName(Head);
+  if (RegNum) {
+    if (Next != StringRef::npos) {
+      Kind = Name.slice(Next, StringRef::npos);
+      if (!isValidVectorKind(Kind)) {
+        TokError("invalid vector kind qualifier");
+        return -1;
+      }
+    }
+    Parser.Lex(); // Eat the register token.
+    return RegNum;
+  }
+  return -1;
+}
+
+static int MatchSysCRName(StringRef Name) {
+  // Use the same layout as the tablegen'erated register name matcher. Ugly,
+  // but efficient.
+  switch (Name.size()) {
+  default:
+    break;
+  case 2:
+    if (Name[0] != 'c' && Name[0] != 'C')
+      return -1;
+    switch (Name[1]) {
+    default:
+      return -1;
+    case '0':
+      return 0;
+    case '1':
+      return 1;
+    case '2':
+      return 2;
+    case '3':
+      return 3;
+    case '4':
+      return 4;
+    case '5':
+      return 5;
+    case '6':
+      return 6;
+    case '7':
+      return 7;
+    case '8':
+      return 8;
+    case '9':
+      return 9;
+    }
+    break;
+  case 3:
+    if ((Name[0] != 'c' && Name[0] != 'C') || Name[1] != '1')
+      return -1;
+    switch (Name[2]) {
+    default:
+      return -1;
+    case '0':
+      return 10;
+    case '1':
+      return 11;
+    case '2':
+      return 12;
+    case '3':
+      return 13;
+    case '4':
+      return 14;
+    case '5':
+      return 15;
+    }
+    break;
+  }
+
+  llvm_unreachable("Unhandled SysCR operand string!");
+  return -1;
+}
+
+/// tryParseSysCROperand - Try to parse a system instruction CR operand name.
+ARM64AsmParser::OperandMatchResultTy
+ARM64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
+  SMLoc S = getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Identifier))
+    return MatchOperand_NoMatch;
+
+  int Num = MatchSysCRName(Tok.getString());
+  if (Num == -1)
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARM64Operand::CreateSysCR(Num, S, getLoc(), getContext()));
+  return MatchOperand_Success;
+}
+
+/// tryParsePrefetch - Try to parse a prefetch operand.
+ARM64AsmParser::OperandMatchResultTy
+ARM64AsmParser::tryParsePrefetch(OperandVector &Operands) {
+  SMLoc S = getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  // Either an identifier for named values or a 5-bit immediate.
+  if (Tok.is(AsmToken::Hash)) {
+    Parser.Lex(); // Eat hash token.
+    const MCExpr *ImmVal;
+    if (getParser().parseExpression(ImmVal))
+      return MatchOperand_ParseFail;
+
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+    if (!MCE) {
+      TokError("immediate value expected for prefetch operand");
+      return MatchOperand_ParseFail;
+    }
+    unsigned prfop = MCE->getValue();
+    if (prfop > 31) {
+      TokError("prefetch operand out of range, [0,31] expected");
+      return MatchOperand_ParseFail;
+    }
+
+    Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext()));
+    return MatchOperand_Success;
+  }
+
+  if (Tok.isNot(AsmToken::Identifier)) {
+    TokError("pre-fetch hint expected");
+    return MatchOperand_ParseFail;
+  }
+
+  unsigned prfop = StringSwitch<unsigned>(Tok.getString())
+                       .Case("pldl1keep", ARM64_AM::PLDL1KEEP)
+                       .Case("pldl1strm", ARM64_AM::PLDL1STRM)
+                       .Case("pldl2keep", ARM64_AM::PLDL2KEEP)
+                       .Case("pldl2strm", ARM64_AM::PLDL2STRM)
+                       .Case("pldl3keep", ARM64_AM::PLDL3KEEP)
+                       .Case("pldl3strm", ARM64_AM::PLDL3STRM)
+                       .Case("pstl1keep", ARM64_AM::PSTL1KEEP)
+                       .Case("pstl1strm", ARM64_AM::PSTL1STRM)
+                       .Case("pstl2keep", ARM64_AM::PSTL2KEEP)
+                       .Case("pstl2strm", ARM64_AM::PSTL2STRM)
+                       .Case("pstl3keep", ARM64_AM::PSTL3KEEP)
+                       .Case("pstl3strm", ARM64_AM::PSTL3STRM)
+                       .Default(0xff);
+  if (prfop == 0xff) {
+    TokError("pre-fetch hint expected");
+    return MatchOperand_ParseFail;
+  }
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext()));
+  return MatchOperand_Success;
+}
+
+/// tryParseAdrpLabel - Parse and validate a source label for the ADRP
+/// instruction.
+ARM64AsmParser::OperandMatchResultTy
+ARM64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
+  SMLoc S = getLoc();
+  const MCExpr *Expr;
+  if (parseSymbolicImmVal(Expr))
+    return MatchOperand_ParseFail;
+
+  ARM64MCExpr::VariantKind ELFRefKind;
+  MCSymbolRefExpr::VariantKind DarwinRefKind;
+  const MCConstantExpr *Addend;
+  if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
+    Error(S, "modified label reference + constant expected");
+    return MatchOperand_ParseFail;
+  }
+
+  if (DarwinRefKind == MCSymbolRefExpr::VK_None &&
+      ELFRefKind == ARM64MCExpr::VK_INVALID) {
+    // No modifier was specified at all; this is the syntax for an ELF basic
+    // ADRP relocation (unfortunately).
+    Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_ABS_PAGE, getContext());
+  } else if ((DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGE ||
+              DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGE) &&
+             Addend != 0) {
+    Error(S, "gotpage label reference not allowed an addend");
+    return MatchOperand_ParseFail;
+  } else if (DarwinRefKind != MCSymbolRefExpr::VK_PAGE &&
+             DarwinRefKind != MCSymbolRefExpr::VK_GOTPAGE &&
+             DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE &&
+             ELFRefKind != ARM64MCExpr::VK_GOT_PAGE &&
+             ELFRefKind != ARM64MCExpr::VK_GOTTPREL_PAGE &&
+             ELFRefKind != ARM64MCExpr::VK_TLSDESC_PAGE) {
+    // The operand must be an @page or @gotpage qualified symbolref.
+    Error(S, "page or gotpage label reference expected");
+    return MatchOperand_ParseFail;
+  }
+
+  // We have a label reference possibly with addend. The addend is a raw value
+  // here. The linker will adjust it to only reference the page.
+  SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+  Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext()));
+
+  return MatchOperand_Success;
+}
+
+/// tryParseAdrLabel - Parse and validate a source label for the ADR
+/// instruction.
+ARM64AsmParser::OperandMatchResultTy
+ARM64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
+  SMLoc S = getLoc();
+  const MCExpr *Expr;
+  if (getParser().parseExpression(Expr))
+    return MatchOperand_ParseFail;
+
+  // The operand must be an un-qualified assembler local symbolref.
+  // FIXME: wrong for ELF.
+  if (const MCSymbolRefExpr *SRE = dyn_cast<const MCSymbolRefExpr>(Expr)) {
+    // FIXME: Should reference the MachineAsmInfo to get the private prefix.
+    bool isTemporary = SRE->getSymbol().getName().startswith("L");
+    if (!isTemporary || SRE->getKind() != MCSymbolRefExpr::VK_None) {
+      Error(S, "unqualified, assembler-local label name expected");
+      return MatchOperand_ParseFail;
+    }
+  }
+
+  SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+  Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext()));
+
+  return MatchOperand_Success;
+}
+
+/// tryParseFPImm - A floating point immediate expression operand.
+ARM64AsmParser::OperandMatchResultTy
+ARM64AsmParser::tryParseFPImm(OperandVector &Operands) {
+  SMLoc S = getLoc();
+
+  if (Parser.getTok().isNot(AsmToken::Hash))
+    return MatchOperand_NoMatch;
+  Parser.Lex(); // Eat the '#'.
+
+  // Handle negation, as that still comes through as a separate token.
+  bool isNegative = false;
+  if (Parser.getTok().is(AsmToken::Minus)) {
+    isNegative = true;
+    Parser.Lex();
+  }
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.is(AsmToken::Real)) {
+    APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+    uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+    // If we had a '-' in front, toggle the sign bit.
+    IntVal ^= (uint64_t)isNegative << 63;
+    int Val = ARM64_AM::getFP64Imm(APInt(64, IntVal));
+    Parser.Lex(); // Eat the token.
+    // Check for out of range values. As an exception, we let Zero through,
+    // as we handle that special case in post-processing before matching in
+    // order to use the zero register for it.
+    if (Val == -1 && !RealVal.isZero()) {
+      TokError("floating point value out of range");
+      return MatchOperand_ParseFail;
+    }
+    Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext()));
+    return MatchOperand_Success;
+  }
+  if (Tok.is(AsmToken::Integer)) {
+    int64_t Val;
+    if (!isNegative && Tok.getString().startswith("0x")) {
+      Val = Tok.getIntVal();
+      if (Val > 255 || Val < 0) {
+        TokError("encoded floating point value out of range");
+        return MatchOperand_ParseFail;
+      }
+    } else {
+      APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+      uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+      // If we had a '-' in front, toggle the sign bit.
+      IntVal ^= (uint64_t)isNegative << 63;
+      Val = ARM64_AM::getFP64Imm(APInt(64, IntVal));
+    }
+    Parser.Lex(); // Eat the token.
+    Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext()));
+    return MatchOperand_Success;
+  }
+
+  TokError("invalid floating point immediate");
+  return MatchOperand_ParseFail;
+}
+
+/// parseCondCodeString - Parse a Condition Code string.
+unsigned ARM64AsmParser::parseCondCodeString(StringRef Cond) {
+  unsigned CC = StringSwitch<unsigned>(Cond)
+                    .Case("eq", ARM64CC::EQ)
+                    .Case("ne", ARM64CC::NE)
+                    .Case("cs", ARM64CC::CS)
+                    .Case("hs", ARM64CC::CS)
+                    .Case("cc", ARM64CC::CC)
+                    .Case("lo", ARM64CC::CC)
+                    .Case("mi", ARM64CC::MI)
+                    .Case("pl", ARM64CC::PL)
+                    .Case("vs", ARM64CC::VS)
+                    .Case("vc", ARM64CC::VC)
+                    .Case("hi", ARM64CC::HI)
+                    .Case("ls", ARM64CC::LS)
+                    .Case("ge", ARM64CC::GE)
+                    .Case("lt", ARM64CC::LT)
+                    .Case("gt", ARM64CC::GT)
+                    .Case("le", ARM64CC::LE)
+                    .Case("al", ARM64CC::AL)
+                // Upper case works too. Not mixed case, though.
+                    .Case("EQ", ARM64CC::EQ)
+                    .Case("NE", ARM64CC::NE)
+                    .Case("CS", ARM64CC::CS)
+                    .Case("HS", ARM64CC::CS)
+                    .Case("CC", ARM64CC::CC)
+                    .Case("LO", ARM64CC::CC)
+                    .Case("MI", ARM64CC::MI)
+                    .Case("PL", ARM64CC::PL)
+                    .Case("VS", ARM64CC::VS)
+                    .Case("VC", ARM64CC::VC)
+                    .Case("HI", ARM64CC::HI)
+                    .Case("LS", ARM64CC::LS)
+                    .Case("GE", ARM64CC::GE)
+                    .Case("LT", ARM64CC::LT)
+                    .Case("GT", ARM64CC::GT)
+                    .Case("LE", ARM64CC::LE)
+                    .Case("AL", ARM64CC::AL)
+                    .Default(~0U);
+  return CC;
+}
+
+/// parseCondCode - Parse a Condition Code operand.
+bool ARM64AsmParser::parseCondCode(OperandVector &Operands,
+                                   bool invertCondCode) {
+  SMLoc S = getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+  StringRef Cond = Tok.getString();
+  unsigned CC = parseCondCodeString(Cond);
+  if (CC == ~0U)
+    return TokError("invalid condition code");
+  Parser.Lex(); // Eat identifier token.
+
+  if (invertCondCode)
+    CC = ARM64CC::getInvertedCondCode(ARM64CC::CondCode(CC));
+
+  const MCExpr *CCExpr = MCConstantExpr::Create(CC, getContext());
+  Operands.push_back(
+      ARM64Operand::CreateImm(CCExpr, S, getLoc(), getContext()));
+  return false;
+}
+
+/// ParseOptionalShift - Some operands take an optional shift argument. Parse
+/// them if present.
+bool ARM64AsmParser::parseOptionalShift(OperandVector &Operands) {
+  const AsmToken &Tok = Parser.getTok();
+  ARM64_AM::ShiftType ShOp = StringSwitch<ARM64_AM::ShiftType>(Tok.getString())
+                                 .Case("lsl", ARM64_AM::LSL)
+                                 .Case("lsr", ARM64_AM::LSR)
+                                 .Case("asr", ARM64_AM::ASR)
+                                 .Case("ror", ARM64_AM::ROR)
+                                 .Case("msl", ARM64_AM::MSL)
+                                 .Case("LSL", ARM64_AM::LSL)
+                                 .Case("LSR", ARM64_AM::LSR)
+                                 .Case("ASR", ARM64_AM::ASR)
+                                 .Case("ROR", ARM64_AM::ROR)
+                                 .Case("MSL", ARM64_AM::MSL)
+                                 .Default(ARM64_AM::InvalidShift);
+  if (ShOp == ARM64_AM::InvalidShift)
+    return true;
+
+  SMLoc S = Tok.getLoc();
+  Parser.Lex();
+
+  // We expect a number here.
+  if (getLexer().isNot(AsmToken::Hash))
+    return TokError("immediate value expected for shifter operand");
+  Parser.Lex(); // Eat the '#'.
+
+  SMLoc ExprLoc = getLoc();
+  const MCExpr *ImmVal;
+  if (getParser().parseExpression(ImmVal))
+    return true;
+
+  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+  if (!MCE)
+    return TokError("immediate value expected for shifter operand");
+
+  if ((MCE->getValue() & 0x3f) != MCE->getValue())
+    return Error(ExprLoc, "immediate value too large for shifter operand");
+
+  SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+  Operands.push_back(
+      ARM64Operand::CreateShifter(ShOp, MCE->getValue(), S, E, getContext()));
+  return false;
+}
+
+/// parseOptionalExtend - Some operands take an optional extend argument. Parse
+/// them if present.
+bool ARM64AsmParser::parseOptionalExtend(OperandVector &Operands) {
+  const AsmToken &Tok = Parser.getTok();
+  ARM64_AM::ExtendType ExtOp =
+      StringSwitch<ARM64_AM::ExtendType>(Tok.getString())
+          .Case("uxtb", ARM64_AM::UXTB)
+          .Case("uxth", ARM64_AM::UXTH)
+          .Case("uxtw", ARM64_AM::UXTW)
+          .Case("uxtx", ARM64_AM::UXTX)
+          .Case("lsl", ARM64_AM::UXTX) // Alias for UXTX
+          .Case("sxtb", ARM64_AM::SXTB)
+          .Case("sxth", ARM64_AM::SXTH)
+          .Case("sxtw", ARM64_AM::SXTW)
+          .Case("sxtx", ARM64_AM::SXTX)
+          .Case("UXTB", ARM64_AM::UXTB)
+          .Case("UXTH", ARM64_AM::UXTH)
+          .Case("UXTW", ARM64_AM::UXTW)
+          .Case("UXTX", ARM64_AM::UXTX)
+          .Case("LSL", ARM64_AM::UXTX) // Alias for UXTX
+          .Case("SXTB", ARM64_AM::SXTB)
+          .Case("SXTH", ARM64_AM::SXTH)
+          .Case("SXTW", ARM64_AM::SXTW)
+          .Case("SXTX", ARM64_AM::SXTX)
+          .Default(ARM64_AM::InvalidExtend);
+  if (ExtOp == ARM64_AM::InvalidExtend)
+    return true;
+
+  SMLoc S = Tok.getLoc();
+  Parser.Lex();
+
+  if (getLexer().is(AsmToken::EndOfStatement) ||
+      getLexer().is(AsmToken::Comma)) {
+    SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+    Operands.push_back(
+        ARM64Operand::CreateExtend(ExtOp, 0, S, E, getContext()));
+    return false;
+  }
+
+  if (getLexer().isNot(AsmToken::Hash)) {
+    SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+    Operands.push_back(
+        ARM64Operand::CreateExtend(ExtOp, 0, S, E, getContext()));
+    return false;
+  }
+
+  Parser.Lex(); // Eat the '#'.
+
+  const MCExpr *ImmVal;
+  if (getParser().parseExpression(ImmVal))
+    return true;
+
+  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+  if (!MCE)
+    return TokError("immediate value expected for extend operand");
+
+  SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+  Operands.push_back(
+      ARM64Operand::CreateExtend(ExtOp, MCE->getValue(), S, E, getContext()));
+  return false;
+}
+
+/// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for
+/// the SYS instruction. Parse them specially so that we create a SYS MCInst.
+bool ARM64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
+                                   OperandVector &Operands) {
+  if (Name.find('.') != StringRef::npos)
+    return TokError("invalid operand");
+
+  Mnemonic = Name;
+  Operands.push_back(
+      ARM64Operand::CreateToken("sys", false, NameLoc, getContext()));
+
+  const AsmToken &Tok = Parser.getTok();
+  StringRef Op = Tok.getString();
+  SMLoc S = Tok.getLoc();
+
+  const MCExpr *Expr = 0;
+
+#define SYS_ALIAS(op1, Cn, Cm, op2)                                            \
+  do {                                                                         \
+    Expr = MCConstantExpr::Create(op1, getContext());                          \
+    Operands.push_back(                                                        \
+        ARM64Operand::CreateImm(Expr, S, getLoc(), getContext()));             \
+    Operands.push_back(                                                        \
+        ARM64Operand::CreateSysCR(Cn, S, getLoc(), getContext()));             \
+    Operands.push_back(                                                        \
+        ARM64Operand::CreateSysCR(Cm, S, getLoc(), getContext()));             \
+    Expr = MCConstantExpr::Create(op2, getContext());                          \
+    Operands.push_back(                                                        \
+        ARM64Operand::CreateImm(Expr, S, getLoc(), getContext()));             \
+  } while (0)
+
+  if (Mnemonic == "ic") {
+    if (!Op.compare_lower("ialluis")) {
+      // SYS #0, C7, C1, #0
+      SYS_ALIAS(0, 7, 1, 0);
+    } else if (!Op.compare_lower("iallu")) {
+      // SYS #0, C7, C5, #0
+      SYS_ALIAS(0, 7, 5, 0);
+    } else if (!Op.compare_lower("ivau")) {
+      // SYS #3, C7, C5, #1
+      SYS_ALIAS(3, 7, 5, 1);
+    } else {
+      return TokError("invalid operand for IC instruction");
+    }
+  } else if (Mnemonic == "dc") {
+    if (!Op.compare_lower("zva")) {
+      // SYS #3, C7, C4, #1
+      SYS_ALIAS(3, 7, 4, 1);
+    } else if (!Op.compare_lower("ivac")) {
+      // SYS #3, C7, C6, #1
+      SYS_ALIAS(0, 7, 6, 1);
+    } else if (!Op.compare_lower("isw")) {
+      // SYS #0, C7, C6, #2
+      SYS_ALIAS(0, 7, 6, 2);
+    } else if (!Op.compare_lower("cvac")) {
+      // SYS #3, C7, C10, #1
+      SYS_ALIAS(3, 7, 10, 1);
+    } else if (!Op.compare_lower("csw")) {
+      // SYS #0, C7, C10, #2
+      SYS_ALIAS(0, 7, 10, 2);
+    } else if (!Op.compare_lower("cvau")) {
+      // SYS #3, C7, C11, #1
+      SYS_ALIAS(3, 7, 11, 1);
+    } else if (!Op.compare_lower("civac")) {
+      // SYS #3, C7, C14, #1
+      SYS_ALIAS(3, 7, 14, 1);
+    } else if (!Op.compare_lower("cisw")) {
+      // SYS #0, C7, C14, #2
+      SYS_ALIAS(0, 7, 14, 2);
+    } else {
+      return TokError("invalid operand for DC instruction");
+    }
+  } else if (Mnemonic == "at") {
+    if (!Op.compare_lower("s1e1r")) {
+      // SYS #0, C7, C8, #0
+      SYS_ALIAS(0, 7, 8, 0);
+    } else if (!Op.compare_lower("s1e2r")) {
+      // SYS #4, C7, C8, #0
+      SYS_ALIAS(4, 7, 8, 0);
+    } else if (!Op.compare_lower("s1e3r")) {
+      // SYS #6, C7, C8, #0
+      SYS_ALIAS(6, 7, 8, 0);
+    } else if (!Op.compare_lower("s1e1w")) {
+      // SYS #0, C7, C8, #1
+      SYS_ALIAS(0, 7, 8, 1);
+    } else if (!Op.compare_lower("s1e2w")) {
+      // SYS #4, C7, C8, #1
+      SYS_ALIAS(4, 7, 8, 1);
+    } else if (!Op.compare_lower("s1e3w")) {
+      // SYS #6, C7, C8, #1
+      SYS_ALIAS(6, 7, 8, 1);
+    } else if (!Op.compare_lower("s1e0r")) {
+      // SYS #0, C7, C8, #3
+      SYS_ALIAS(0, 7, 8, 2);
+    } else if (!Op.compare_lower("s1e0w")) {
+      // SYS #0, C7, C8, #3
+      SYS_ALIAS(0, 7, 8, 3);
+    } else if (!Op.compare_lower("s12e1r")) {
+      // SYS #4, C7, C8, #4
+      SYS_ALIAS(4, 7, 8, 4);
+    } else if (!Op.compare_lower("s12e1w")) {
+      // SYS #4, C7, C8, #5
+      SYS_ALIAS(4, 7, 8, 5);
+    } else if (!Op.compare_lower("s12e0r")) {
+      // SYS #4, C7, C8, #6
+      SYS_ALIAS(4, 7, 8, 6);
+    } else if (!Op.compare_lower("s12e0w")) {
+      // SYS #4, C7, C8, #7
+      SYS_ALIAS(4, 7, 8, 7);
+    } else {
+      return TokError("invalid operand for AT instruction");
+    }
+  } else if (Mnemonic == "tlbi") {
+    if (!Op.compare_lower("vmalle1is")) {
+      // SYS #0, C8, C3, #0
+      SYS_ALIAS(0, 8, 3, 0);
+    } else if (!Op.compare_lower("alle2is")) {
+      // SYS #4, C8, C3, #0
+      SYS_ALIAS(4, 8, 3, 0);
+    } else if (!Op.compare_lower("alle3is")) {
+      // SYS #6, C8, C3, #0
+      SYS_ALIAS(6, 8, 3, 0);
+    } else if (!Op.compare_lower("vae1is")) {
+      // SYS #0, C8, C3, #1
+      SYS_ALIAS(0, 8, 3, 1);
+    } else if (!Op.compare_lower("vae2is")) {
+      // SYS #4, C8, C3, #1
+      SYS_ALIAS(4, 8, 3, 1);
+    } else if (!Op.compare_lower("vae3is")) {
+      // SYS #6, C8, C3, #1
+      SYS_ALIAS(6, 8, 3, 1);
+    } else if (!Op.compare_lower("aside1is")) {
+      // SYS #0, C8, C3, #2
+      SYS_ALIAS(0, 8, 3, 2);
+    } else if (!Op.compare_lower("vaae1is")) {
+      // SYS #0, C8, C3, #3
+      SYS_ALIAS(0, 8, 3, 3);
+    } else if (!Op.compare_lower("alle1is")) {
+      // SYS #4, C8, C3, #4
+      SYS_ALIAS(4, 8, 3, 4);
+    } else if (!Op.compare_lower("vale1is")) {
+      // SYS #0, C8, C3, #5
+      SYS_ALIAS(0, 8, 3, 5);
+    } else if (!Op.compare_lower("vaale1is")) {
+      // SYS #0, C8, C3, #7
+      SYS_ALIAS(0, 8, 3, 7);
+    } else if (!Op.compare_lower("vmalle1")) {
+      // SYS #0, C8, C7, #0
+      SYS_ALIAS(0, 8, 7, 0);
+    } else if (!Op.compare_lower("alle2")) {
+      // SYS #4, C8, C7, #0
+      SYS_ALIAS(4, 8, 7, 0);
+    } else if (!Op.compare_lower("vale2is")) {
+      // SYS #4, C8, C3, #5
+      SYS_ALIAS(4, 8, 3, 5);
+    } else if (!Op.compare_lower("vale3is")) {
+      // SYS #6, C8, C3, #5
+      SYS_ALIAS(6, 8, 3, 5);
+    } else if (!Op.compare_lower("alle3")) {
+      // SYS #6, C8, C7, #0
+      SYS_ALIAS(6, 8, 7, 0);
+    } else if (!Op.compare_lower("vae1")) {
+      // SYS #0, C8, C7, #1
+      SYS_ALIAS(0, 8, 7, 1);
+    } else if (!Op.compare_lower("vae2")) {
+      // SYS #4, C8, C7, #1
+      SYS_ALIAS(4, 8, 7, 1);
+    } else if (!Op.compare_lower("vae3")) {
+      // SYS #6, C8, C7, #1
+      SYS_ALIAS(6, 8, 7, 1);
+    } else if (!Op.compare_lower("aside1")) {
+      // SYS #0, C8, C7, #2
+      SYS_ALIAS(0, 8, 7, 2);
+    } else if (!Op.compare_lower("vaae1")) {
+      // SYS #0, C8, C7, #3
+      SYS_ALIAS(0, 8, 7, 3);
+    } else if (!Op.compare_lower("alle1")) {
+      // SYS #4, C8, C7, #4
+      SYS_ALIAS(4, 8, 7, 4);
+    } else if (!Op.compare_lower("vale1")) {
+      // SYS #0, C8, C7, #5
+      SYS_ALIAS(0, 8, 7, 5);
+    } else if (!Op.compare_lower("vale2")) {
+      // SYS #4, C8, C7, #5
+      SYS_ALIAS(4, 8, 7, 5);
+    } else if (!Op.compare_lower("vale3")) {
+      // SYS #6, C8, C7, #5
+      SYS_ALIAS(6, 8, 7, 5);
+    } else if (!Op.compare_lower("vaale1")) {
+      // SYS #0, C8, C7, #7
+      SYS_ALIAS(0, 8, 7, 7);
+    } else if (!Op.compare_lower("ipas2e1")) {
+      // SYS #4, C8, C4, #1
+      SYS_ALIAS(4, 8, 4, 1);
+    } else if (!Op.compare_lower("ipas2le1")) {
+      // SYS #4, C8, C4, #5
+      SYS_ALIAS(4, 8, 4, 5);
+    } else if (!Op.compare_lower("vmalls12e1")) {
+      // SYS #4, C8, C7, #6
+      SYS_ALIAS(4, 8, 7, 6);
+    } else if (!Op.compare_lower("vmalls12e1is")) {
+      // SYS #4, C8, C3, #6
+      SYS_ALIAS(4, 8, 3, 6);
+    } else {
+      return TokError("invalid operand for TLBI instruction");
+    }
+  }
+
+#undef SYS_ALIAS
+
+  Parser.Lex(); // Eat operand.
+
+  // Check for the optional register operand.
+  if (getLexer().is(AsmToken::Comma)) {
+    Parser.Lex(); // Eat comma.
+
+    if (Tok.isNot(AsmToken::Identifier) || parseRegister(Operands))
+      return TokError("expected register operand");
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    Parser.eatToEndOfStatement();
+    return TokError("unexpected token in argument list");
+  }
+
+  Parser.Lex(); // Consume the EndOfStatement
+  return false;
+}
+
+ARM64AsmParser::OperandMatchResultTy
+ARM64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
+  const AsmToken &Tok = Parser.getTok();
+
+  // Can be either a #imm style literal or an option name
+  if (Tok.is(AsmToken::Hash)) {
+    // Immediate operand.
+    Parser.Lex(); // Eat the '#'
+    const MCExpr *ImmVal;
+    SMLoc ExprLoc = getLoc();
+    if (getParser().parseExpression(ImmVal))
+      return MatchOperand_ParseFail;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+    if (!MCE) {
+      Error(ExprLoc, "immediate value expected for barrier operand");
+      return MatchOperand_ParseFail;
+    }
+    if (MCE->getValue() < 0 || MCE->getValue() > 15) {
+      Error(ExprLoc, "barrier operand out of range");
+      return MatchOperand_ParseFail;
+    }
+    Operands.push_back(
+        ARM64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext()));
+    return MatchOperand_Success;
+  }
+
+  if (Tok.isNot(AsmToken::Identifier)) {
+    TokError("invalid operand for instruction");
+    return MatchOperand_ParseFail;
+  }
+
+  unsigned Opt = StringSwitch<unsigned>(Tok.getString())
+                     .Case("oshld", ARM64SYS::OSHLD)
+                     .Case("oshst", ARM64SYS::OSHST)
+                     .Case("osh", ARM64SYS::OSH)
+                     .Case("nshld", ARM64SYS::NSHLD)
+                     .Case("nshst", ARM64SYS::NSHST)
+                     .Case("nsh", ARM64SYS::NSH)
+                     .Case("ishld", ARM64SYS::ISHLD)
+                     .Case("ishst", ARM64SYS::ISHST)
+                     .Case("ish", ARM64SYS::ISH)
+                     .Case("ld", ARM64SYS::LD)
+                     .Case("st", ARM64SYS::ST)
+                     .Case("sy", ARM64SYS::SY)
+                     .Default(ARM64SYS::InvalidBarrier);
+  if (Opt == ARM64SYS::InvalidBarrier) {
+    TokError("invalid barrier option name");
+    return MatchOperand_ParseFail;
+  }
+
+  // The only valid named option for ISB is 'sy'
+  if (Mnemonic == "isb" && Opt != ARM64SYS::SY) {
+    TokError("'sy' or #imm operand expected");
+    return MatchOperand_ParseFail;
+  }
+
+  Operands.push_back(ARM64Operand::CreateBarrier(Opt, getLoc(), getContext()));
+  Parser.Lex(); // Consume the option
+
+  return MatchOperand_Success;
+}
+
+ARM64AsmParser::OperandMatchResultTy
+ARM64AsmParser::tryParseSystemRegister(OperandVector &Operands) {
+  const AsmToken &Tok = Parser.getTok();
+
+  // It can be specified as a symbolic name.
+  if (Tok.isNot(AsmToken::Identifier))
+    return MatchOperand_NoMatch;
+
+  StringRef ID = Tok.getString().lower();
+  ARM64SYS::SystemRegister Reg =
+      StringSwitch<ARM64SYS::SystemRegister>(ID)
+          .Case("spsr_el1", ARM64SYS::SPSR_svc)
+          .Case("spsr_svc", ARM64SYS::SPSR_svc)
+          .Case("elr_el1", ARM64SYS::ELR_EL1)
+          .Case("sp_el0", ARM64SYS::SP_EL0)
+          .Case("spsel", ARM64SYS::SPSel)
+          .Case("daif", ARM64SYS::DAIF)
+          .Case("currentel", ARM64SYS::CurrentEL)
+          .Case("nzcv", ARM64SYS::NZCV)
+          .Case("fpcr", ARM64SYS::FPCR)
+          .Case("fpsr", ARM64SYS::FPSR)
+          .Case("dspsr", ARM64SYS::DSPSR)
+          .Case("dlr", ARM64SYS::DLR)
+          .Case("spsr_el2", ARM64SYS::SPSR_hyp)
+          .Case("spsr_hyp", ARM64SYS::SPSR_hyp)
+          .Case("elr_el2", ARM64SYS::ELR_EL2)
+          .Case("sp_el1", ARM64SYS::SP_EL1)
+          .Case("spsr_irq", ARM64SYS::SPSR_irq)
+          .Case("spsr_abt", ARM64SYS::SPSR_abt)
+          .Case("spsr_und", ARM64SYS::SPSR_und)
+          .Case("spsr_fiq", ARM64SYS::SPSR_fiq)
+          .Case("spsr_el3", ARM64SYS::SPSR_EL3)
+          .Case("elr_el3", ARM64SYS::ELR_EL3)
+          .Case("sp_el2", ARM64SYS::SP_EL2)
+          .Case("midr_el1", ARM64SYS::MIDR_EL1)
+          .Case("ctr_el0", ARM64SYS::CTR_EL0)
+          .Case("mpidr_el1", ARM64SYS::MPIDR_EL1)
+          .Case("ecoidr_el1", ARM64SYS::ECOIDR_EL1)
+          .Case("dczid_el0", ARM64SYS::DCZID_EL0)
+          .Case("mvfr0_el1", ARM64SYS::MVFR0_EL1)
+          .Case("mvfr1_el1", ARM64SYS::MVFR1_EL1)
+          .Case("id_aa64pfr0_el1", ARM64SYS::ID_AA64PFR0_EL1)
+          .Case("id_aa64pfr1_el1", ARM64SYS::ID_AA64PFR1_EL1)
+          .Case("id_aa64dfr0_el1", ARM64SYS::ID_AA64DFR0_EL1)
+          .Case("id_aa64dfr1_el1", ARM64SYS::ID_AA64DFR1_EL1)
+          .Case("id_aa64isar0_el1", ARM64SYS::ID_AA64ISAR0_EL1)
+          .Case("id_aa64isar1_el1", ARM64SYS::ID_AA64ISAR1_EL1)
+          .Case("id_aa64mmfr0_el1", ARM64SYS::ID_AA64MMFR0_EL1)
+          .Case("id_aa64mmfr1_el1", ARM64SYS::ID_AA64MMFR1_EL1)
+          .Case("ccsidr_el1", ARM64SYS::CCSIDR_EL1)
+          .Case("clidr_el1", ARM64SYS::CLIDR_EL1)
+          .Case("aidr_el1", ARM64SYS::AIDR_EL1)
+          .Case("csselr_el1", ARM64SYS::CSSELR_EL1)
+          .Case("vpidr_el2", ARM64SYS::VPIDR_EL2)
+          .Case("vmpidr_el2", ARM64SYS::VMPIDR_EL2)
+          .Case("sctlr_el1", ARM64SYS::SCTLR_EL1)
+          .Case("sctlr_el2", ARM64SYS::SCTLR_EL2)
+          .Case("sctlr_el3", ARM64SYS::SCTLR_EL3)
+          .Case("actlr_el1", ARM64SYS::ACTLR_EL1)
+          .Case("actlr_el2", ARM64SYS::ACTLR_EL2)
+          .Case("actlr_el3", ARM64SYS::ACTLR_EL3)
+          .Case("cpacr_el1", ARM64SYS::CPACR_EL1)
+          .Case("cptr_el2", ARM64SYS::CPTR_EL2)
+          .Case("cptr_el3", ARM64SYS::CPTR_EL3)
+          .Case("scr_el3", ARM64SYS::SCR_EL3)
+          .Case("hcr_el2", ARM64SYS::HCR_EL2)
+          .Case("mdcr_el2", ARM64SYS::MDCR_EL2)
+          .Case("mdcr_el3", ARM64SYS::MDCR_EL3)
+          .Case("hstr_el2", ARM64SYS::HSTR_EL2)
+          .Case("hacr_el2", ARM64SYS::HACR_EL2)
+          .Case("ttbr0_el1", ARM64SYS::TTBR0_EL1)
+          .Case("ttbr1_el1", ARM64SYS::TTBR1_EL1)
+          .Case("ttbr0_el2", ARM64SYS::TTBR0_EL2)
+          .Case("ttbr0_el3", ARM64SYS::TTBR0_EL3)
+          .Case("vttbr_el2", ARM64SYS::VTTBR_EL2)
+          .Case("tcr_el1", ARM64SYS::TCR_EL1)
+          .Case("tcr_el2", ARM64SYS::TCR_EL2)
+          .Case("tcr_el3", ARM64SYS::TCR_EL3)
+          .Case("vtcr_el2", ARM64SYS::VTCR_EL2)
+          .Case("adfsr_el1", ARM64SYS::ADFSR_EL1)
+          .Case("aifsr_el1", ARM64SYS::AIFSR_EL1)
+          .Case("adfsr_el2", ARM64SYS::ADFSR_EL2)
+          .Case("aifsr_el2", ARM64SYS::AIFSR_EL2)
+          .Case("adfsr_el3", ARM64SYS::ADFSR_EL3)
+          .Case("aifsr_el3", ARM64SYS::AIFSR_EL3)
+          .Case("esr_el1", ARM64SYS::ESR_EL1)
+          .Case("esr_el2", ARM64SYS::ESR_EL2)
+          .Case("esr_el3", ARM64SYS::ESR_EL3)
+          .Case("far_el1", ARM64SYS::FAR_EL1)
+          .Case("far_el2", ARM64SYS::FAR_EL2)
+          .Case("far_el3", ARM64SYS::FAR_EL3)
+          .Case("hpfar_el2", ARM64SYS::HPFAR_EL2)
+          .Case("par_el1", ARM64SYS::PAR_EL1)
+          .Case("mair_el1", ARM64SYS::MAIR_EL1)
+          .Case("mair_el2", ARM64SYS::MAIR_EL2)
+          .Case("mair_el3", ARM64SYS::MAIR_EL3)
+          .Case("amair_el1", ARM64SYS::AMAIR_EL1)
+          .Case("amair_el2", ARM64SYS::AMAIR_EL2)
+          .Case("amair_el3", ARM64SYS::AMAIR_EL3)
+          .Case("vbar_el1", ARM64SYS::VBAR_EL1)
+          .Case("vbar_el2", ARM64SYS::VBAR_EL2)
+          .Case("vbar_el3", ARM64SYS::VBAR_EL3)
+          .Case("rvbar_el1", ARM64SYS::RVBAR_EL1)
+          .Case("rvbar_el2", ARM64SYS::RVBAR_EL2)
+          .Case("rvbar_el3", ARM64SYS::RVBAR_EL3)
+          .Case("isr_el1", ARM64SYS::ISR_EL1)
+          .Case("contextidr_el1", ARM64SYS::CONTEXTIDR_EL1)
+          .Case("tpidr_el0", ARM64SYS::TPIDR_EL0)
+          .Case("tpidrro_el0", ARM64SYS::TPIDRRO_EL0)
+          .Case("tpidr_el1", ARM64SYS::TPIDR_EL1)
+          .Case("tpidr_el2", ARM64SYS::TPIDR_EL2)
+          .Case("tpidr_el3", ARM64SYS::TPIDR_EL3)
+          .Case("teecr32_el1", ARM64SYS::TEECR32_EL1)
+          .Case("cntfrq_el0", ARM64SYS::CNTFRQ_EL0)
+          .Case("cntpct_el0", ARM64SYS::CNTPCT_EL0)
+          .Case("cntvct_el0", ARM64SYS::CNTVCT_EL0)
+          .Case("cntvoff_el2", ARM64SYS::CNTVOFF_EL2)
+          .Case("cntkctl_el1", ARM64SYS::CNTKCTL_EL1)
+          .Case("cnthctl_el2", ARM64SYS::CNTHCTL_EL2)
+          .Case("cntp_tval_el0", ARM64SYS::CNTP_TVAL_EL0)
+          .Case("cntp_ctl_el0", ARM64SYS::CNTP_CTL_EL0)
+          .Case("cntp_cval_el0", ARM64SYS::CNTP_CVAL_EL0)
+          .Case("cntv_tval_el0", ARM64SYS::CNTV_TVAL_EL0)
+          .Case("cntv_ctl_el0", ARM64SYS::CNTV_CTL_EL0)
+          .Case("cntv_cval_el0", ARM64SYS::CNTV_CVAL_EL0)
+          .Case("cnthp_tval_el2", ARM64SYS::CNTHP_TVAL_EL2)
+          .Case("cnthp_ctl_el2", ARM64SYS::CNTHP_CTL_EL2)
+          .Case("cnthp_cval_el2", ARM64SYS::CNTHP_CVAL_EL2)
+          .Case("cntps_tval_el1", ARM64SYS::CNTPS_TVAL_EL1)
+          .Case("cntps_ctl_el1", ARM64SYS::CNTPS_CTL_EL1)
+          .Case("cntps_cval_el1", ARM64SYS::CNTPS_CVAL_EL1)
+          .Case("dacr32_el2", ARM64SYS::DACR32_EL2)
+          .Case("ifsr32_el2", ARM64SYS::IFSR32_EL2)
+          .Case("teehbr32_el1", ARM64SYS::TEEHBR32_EL1)
+          .Case("sder32_el3", ARM64SYS::SDER32_EL3)
+          .Case("fpexc32_el2", ARM64SYS::FPEXC32_EL2)
+          .Case("current_el", ARM64SYS::CurrentEL)
+          .Case("pmevcntr0_el0", ARM64SYS::PMEVCNTR0_EL0)
+          .Case("pmevcntr1_el0", ARM64SYS::PMEVCNTR1_EL0)
+          .Case("pmevcntr2_el0", ARM64SYS::PMEVCNTR2_EL0)
+          .Case("pmevcntr3_el0", ARM64SYS::PMEVCNTR3_EL0)
+          .Case("pmevcntr4_el0", ARM64SYS::PMEVCNTR4_EL0)
+          .Case("pmevcntr5_el0", ARM64SYS::PMEVCNTR5_EL0)
+          .Case("pmevcntr6_el0", ARM64SYS::PMEVCNTR6_EL0)
+          .Case("pmevcntr7_el0", ARM64SYS::PMEVCNTR7_EL0)
+          .Case("pmevcntr8_el0", ARM64SYS::PMEVCNTR8_EL0)
+          .Case("pmevcntr9_el0", ARM64SYS::PMEVCNTR9_EL0)
+          .Case("pmevcntr10_el0", ARM64SYS::PMEVCNTR10_EL0)
+          .Case("pmevcntr11_el0", ARM64SYS::PMEVCNTR11_EL0)
+          .Case("pmevcntr12_el0", ARM64SYS::PMEVCNTR12_EL0)
+          .Case("pmevcntr13_el0", ARM64SYS::PMEVCNTR13_EL0)
+          .Case("pmevcntr14_el0", ARM64SYS::PMEVCNTR14_EL0)
+          .Case("pmevcntr15_el0", ARM64SYS::PMEVCNTR15_EL0)
+          .Case("pmevcntr16_el0", ARM64SYS::PMEVCNTR16_EL0)
+          .Case("pmevcntr17_el0", ARM64SYS::PMEVCNTR17_EL0)
+          .Case("pmevcntr18_el0", ARM64SYS::PMEVCNTR18_EL0)
+          .Case("pmevcntr19_el0", ARM64SYS::PMEVCNTR19_EL0)
+          .Case("pmevcntr20_el0", ARM64SYS::PMEVCNTR20_EL0)
+          .Case("pmevcntr21_el0", ARM64SYS::PMEVCNTR21_EL0)
+          .Case("pmevcntr22_el0", ARM64SYS::PMEVCNTR22_EL0)
+          .Case("pmevcntr23_el0", ARM64SYS::PMEVCNTR23_EL0)
+          .Case("pmevcntr24_el0", ARM64SYS::PMEVCNTR24_EL0)
+          .Case("pmevcntr25_el0", ARM64SYS::PMEVCNTR25_EL0)
+          .Case("pmevcntr26_el0", ARM64SYS::PMEVCNTR26_EL0)
+          .Case("pmevcntr27_el0", ARM64SYS::PMEVCNTR27_EL0)
+          .Case("pmevcntr28_el0", ARM64SYS::PMEVCNTR28_EL0)
+          .Case("pmevcntr29_el0", ARM64SYS::PMEVCNTR29_EL0)
+          .Case("pmevcntr30_el0", ARM64SYS::PMEVCNTR30_EL0)
+          .Case("pmevtyper0_el0", ARM64SYS::PMEVTYPER0_EL0)
+          .Case("pmevtyper1_el0", ARM64SYS::PMEVTYPER1_EL0)
+          .Case("pmevtyper2_el0", ARM64SYS::PMEVTYPER2_EL0)
+          .Case("pmevtyper3_el0", ARM64SYS::PMEVTYPER3_EL0)
+          .Case("pmevtyper4_el0", ARM64SYS::PMEVTYPER4_EL0)
+          .Case("pmevtyper5_el0", ARM64SYS::PMEVTYPER5_EL0)
+          .Case("pmevtyper6_el0", ARM64SYS::PMEVTYPER6_EL0)
+          .Case("pmevtyper7_el0", ARM64SYS::PMEVTYPER7_EL0)
+          .Case("pmevtyper8_el0", ARM64SYS::PMEVTYPER8_EL0)
+          .Case("pmevtyper9_el0", ARM64SYS::PMEVTYPER9_EL0)
+          .Case("pmevtyper10_el0", ARM64SYS::PMEVTYPER10_EL0)
+          .Case("pmevtyper11_el0", ARM64SYS::PMEVTYPER11_EL0)
+          .Case("pmevtyper12_el0", ARM64SYS::PMEVTYPER12_EL0)
+          .Case("pmevtyper13_el0", ARM64SYS::PMEVTYPER13_EL0)
+          .Case("pmevtyper14_el0", ARM64SYS::PMEVTYPER14_EL0)
+          .Case("pmevtyper15_el0", ARM64SYS::PMEVTYPER15_EL0)
+          .Case("pmevtyper16_el0", ARM64SYS::PMEVTYPER16_EL0)
+          .Case("pmevtyper17_el0", ARM64SYS::PMEVTYPER17_EL0)
+          .Case("pmevtyper18_el0", ARM64SYS::PMEVTYPER18_EL0)
+          .Case("pmevtyper19_el0", ARM64SYS::PMEVTYPER19_EL0)
+          .Case("pmevtyper20_el0", ARM64SYS::PMEVTYPER20_EL0)
+          .Case("pmevtyper21_el0", ARM64SYS::PMEVTYPER21_EL0)
+          .Case("pmevtyper22_el0", ARM64SYS::PMEVTYPER22_EL0)
+          .Case("pmevtyper23_el0", ARM64SYS::PMEVTYPER23_EL0)
+          .Case("pmevtyper24_el0", ARM64SYS::PMEVTYPER24_EL0)
+          .Case("pmevtyper25_el0", ARM64SYS::PMEVTYPER25_EL0)
+          .Case("pmevtyper26_el0", ARM64SYS::PMEVTYPER26_EL0)
+          .Case("pmevtyper27_el0", ARM64SYS::PMEVTYPER27_EL0)
+          .Case("pmevtyper28_el0", ARM64SYS::PMEVTYPER28_EL0)
+          .Case("pmevtyper29_el0", ARM64SYS::PMEVTYPER29_EL0)
+          .Case("pmevtyper30_el0", ARM64SYS::PMEVTYPER30_EL0)
+          .Case("pmccfiltr_el0", ARM64SYS::PMCCFILTR_EL0)
+          .Case("rmr_el3", ARM64SYS::RMR_EL3)
+          .Case("rmr_el2", ARM64SYS::RMR_EL2)
+          .Case("rmr_el1", ARM64SYS::RMR_EL1)
+          .Case("cpm_ioacc_ctl_el3", ARM64SYS::CPM_IOACC_CTL_EL3)
+          .Case("mdccsr_el0", ARM64SYS::MDCCSR_EL0)
+          .Case("mdccint_el1", ARM64SYS::MDCCINT_EL1)
+          .Case("dbgdtr_el0", ARM64SYS::DBGDTR_EL0)
+          .Case("dbgdtrrx_el0", ARM64SYS::DBGDTRRX_EL0)
+          .Case("dbgdtrtx_el0", ARM64SYS::DBGDTRTX_EL0)
+          .Case("dbgvcr32_el2", ARM64SYS::DBGVCR32_EL2)
+          .Case("osdtrrx_el1", ARM64SYS::OSDTRRX_EL1)
+          .Case("mdscr_el1", ARM64SYS::MDSCR_EL1)
+          .Case("osdtrtx_el1", ARM64SYS::OSDTRTX_EL1)
+          .Case("oseccr_el11", ARM64SYS::OSECCR_EL11)
+          .Case("dbgbvr0_el1", ARM64SYS::DBGBVR0_EL1)
+          .Case("dbgbvr1_el1", ARM64SYS::DBGBVR1_EL1)
+          .Case("dbgbvr2_el1", ARM64SYS::DBGBVR2_EL1)
+          .Case("dbgbvr3_el1", ARM64SYS::DBGBVR3_EL1)
+          .Case("dbgbvr4_el1", ARM64SYS::DBGBVR4_EL1)
+          .Case("dbgbvr5_el1", ARM64SYS::DBGBVR5_EL1)
+          .Case("dbgbvr6_el1", ARM64SYS::DBGBVR6_EL1)
+          .Case("dbgbvr7_el1", ARM64SYS::DBGBVR7_EL1)
+          .Case("dbgbvr8_el1", ARM64SYS::DBGBVR8_EL1)
+          .Case("dbgbvr9_el1", ARM64SYS::DBGBVR9_EL1)
+          .Case("dbgbvr10_el1", ARM64SYS::DBGBVR10_EL1)
+          .Case("dbgbvr11_el1", ARM64SYS::DBGBVR11_EL1)
+          .Case("dbgbvr12_el1", ARM64SYS::DBGBVR12_EL1)
+          .Case("dbgbvr13_el1", ARM64SYS::DBGBVR13_EL1)
+          .Case("dbgbvr14_el1", ARM64SYS::DBGBVR14_EL1)
+          .Case("dbgbvr15_el1", ARM64SYS::DBGBVR15_EL1)
+          .Case("dbgbcr0_el1", ARM64SYS::DBGBCR0_EL1)
+          .Case("dbgbcr1_el1", ARM64SYS::DBGBCR1_EL1)
+          .Case("dbgbcr2_el1", ARM64SYS::DBGBCR2_EL1)
+          .Case("dbgbcr3_el1", ARM64SYS::DBGBCR3_EL1)
+          .Case("dbgbcr4_el1", ARM64SYS::DBGBCR4_EL1)
+          .Case("dbgbcr5_el1", ARM64SYS::DBGBCR5_EL1)
+          .Case("dbgbcr6_el1", ARM64SYS::DBGBCR6_EL1)
+          .Case("dbgbcr7_el1", ARM64SYS::DBGBCR7_EL1)
+          .Case("dbgbcr8_el1", ARM64SYS::DBGBCR8_EL1)
+          .Case("dbgbcr9_el1", ARM64SYS::DBGBCR9_EL1)
+          .Case("dbgbcr10_el1", ARM64SYS::DBGBCR10_EL1)
+          .Case("dbgbcr11_el1", ARM64SYS::DBGBCR11_EL1)
+          .Case("dbgbcr12_el1", ARM64SYS::DBGBCR12_EL1)
+          .Case("dbgbcr13_el1", ARM64SYS::DBGBCR13_EL1)
+          .Case("dbgbcr14_el1", ARM64SYS::DBGBCR14_EL1)
+          .Case("dbgbcr15_el1", ARM64SYS::DBGBCR15_EL1)
+          .Case("dbgwvr0_el1", ARM64SYS::DBGWVR0_EL1)
+          .Case("dbgwvr1_el1", ARM64SYS::DBGWVR1_EL1)
+          .Case("dbgwvr2_el1", ARM64SYS::DBGWVR2_EL1)
+          .Case("dbgwvr3_el1", ARM64SYS::DBGWVR3_EL1)
+          .Case("dbgwvr4_el1", ARM64SYS::DBGWVR4_EL1)
+          .Case("dbgwvr5_el1", ARM64SYS::DBGWVR5_EL1)
+          .Case("dbgwvr6_el1", ARM64SYS::DBGWVR6_EL1)
+          .Case("dbgwvr7_el1", ARM64SYS::DBGWVR7_EL1)
+          .Case("dbgwvr8_el1", ARM64SYS::DBGWVR8_EL1)
+          .Case("dbgwvr9_el1", ARM64SYS::DBGWVR9_EL1)
+          .Case("dbgwvr10_el1", ARM64SYS::DBGWVR10_EL1)
+          .Case("dbgwvr11_el1", ARM64SYS::DBGWVR11_EL1)
+          .Case("dbgwvr12_el1", ARM64SYS::DBGWVR12_EL1)
+          .Case("dbgwvr13_el1", ARM64SYS::DBGWVR13_EL1)
+          .Case("dbgwvr14_el1", ARM64SYS::DBGWVR14_EL1)
+          .Case("dbgwvr15_el1", ARM64SYS::DBGWVR15_EL1)
+          .Case("dbgwcr0_el1", ARM64SYS::DBGWCR0_EL1)
+          .Case("dbgwcr1_el1", ARM64SYS::DBGWCR1_EL1)
+          .Case("dbgwcr2_el1", ARM64SYS::DBGWCR2_EL1)
+          .Case("dbgwcr3_el1", ARM64SYS::DBGWCR3_EL1)
+          .Case("dbgwcr4_el1", ARM64SYS::DBGWCR4_EL1)
+          .Case("dbgwcr5_el1", ARM64SYS::DBGWCR5_EL1)
+          .Case("dbgwcr6_el1", ARM64SYS::DBGWCR6_EL1)
+          .Case("dbgwcr7_el1", ARM64SYS::DBGWCR7_EL1)
+          .Case("dbgwcr8_el1", ARM64SYS::DBGWCR8_EL1)
+          .Case("dbgwcr9_el1", ARM64SYS::DBGWCR9_EL1)
+          .Case("dbgwcr10_el1", ARM64SYS::DBGWCR10_EL1)
+          .Case("dbgwcr11_el1", ARM64SYS::DBGWCR11_EL1)
+          .Case("dbgwcr12_el1", ARM64SYS::DBGWCR12_EL1)
+          .Case("dbgwcr13_el1", ARM64SYS::DBGWCR13_EL1)
+          .Case("dbgwcr14_el1", ARM64SYS::DBGWCR14_EL1)
+          .Case("dbgwcr15_el1", ARM64SYS::DBGWCR15_EL1)
+          .Case("mdrar_el1", ARM64SYS::MDRAR_EL1)
+          .Case("oslar_el1", ARM64SYS::OSLAR_EL1)
+          .Case("oslsr_el1", ARM64SYS::OSLSR_EL1)
+          .Case("osdlr_el1", ARM64SYS::OSDLR_EL1)
+          .Case("dbgprcr_el1", ARM64SYS::DBGPRCR_EL1)
+          .Case("dbgclaimset_el1", ARM64SYS::DBGCLAIMSET_EL1)
+          .Case("dbgclaimclr_el1", ARM64SYS::DBGCLAIMCLR_EL1)
+          .Case("dbgauthstatus_el1", ARM64SYS::DBGAUTHSTATUS_EL1)
+          .Case("dbgdevid2", ARM64SYS::DBGDEVID2)
+          .Case("dbgdevid1", ARM64SYS::DBGDEVID1)
+          .Case("dbgdevid0", ARM64SYS::DBGDEVID0)
+          .Case("id_pfr0_el1", ARM64SYS::ID_PFR0_EL1)
+          .Case("id_pfr1_el1", ARM64SYS::ID_PFR1_EL1)
+          .Case("id_dfr0_el1", ARM64SYS::ID_DFR0_EL1)
+          .Case("id_afr0_el1", ARM64SYS::ID_AFR0_EL1)
+          .Case("id_isar0_el1", ARM64SYS::ID_ISAR0_EL1)
+          .Case("id_isar1_el1", ARM64SYS::ID_ISAR1_EL1)
+          .Case("id_isar2_el1", ARM64SYS::ID_ISAR2_EL1)
+          .Case("id_isar3_el1", ARM64SYS::ID_ISAR3_EL1)
+          .Case("id_isar4_el1", ARM64SYS::ID_ISAR4_EL1)
+          .Case("id_isar5_el1", ARM64SYS::ID_ISAR5_EL1)
+          .Case("afsr1_el1", ARM64SYS::AFSR1_EL1)
+          .Case("afsr0_el1", ARM64SYS::AFSR0_EL1)
+          .Case("revidr_el1", ARM64SYS::REVIDR_EL1)
+          .Default(ARM64SYS::InvalidSystemReg);
+  if (Reg != ARM64SYS::InvalidSystemReg) {
+    // We matched a reg name, so create the operand.
+    Operands.push_back(
+        ARM64Operand::CreateSystemRegister(Reg, getLoc(), getContext()));
+    Parser.Lex(); // Consume the register name.
+    return MatchOperand_Success;
+  }
+
+  // Or we may have an identifier that encodes the sub-operands.
+  // For example, s3_2_c15_c0_0.
+  unsigned op0, op1, CRn, CRm, op2;
+  std::string Desc = ID;
+  if (std::sscanf(Desc.c_str(), "s%u_%u_c%u_c%u_%u", &op0, &op1, &CRn, &CRm,
+                  &op2) != 5)
+    return MatchOperand_NoMatch;
+  if ((op0 != 2 && op0 != 3) || op1 > 7 || CRn > 15 || CRm > 15 || op2 > 7)
+    return MatchOperand_NoMatch;
+
+  unsigned Val = op0 << 14 | op1 << 11 | CRn << 7 | CRm << 3 | op2;
+  Operands.push_back(
+      ARM64Operand::CreateSystemRegister(Val, getLoc(), getContext()));
+  Parser.Lex(); // Consume the register name.
+
+  return MatchOperand_Success;
+}
+
+ARM64AsmParser::OperandMatchResultTy
+ARM64AsmParser::tryParseCPSRField(OperandVector &Operands) {
+  const AsmToken &Tok = Parser.getTok();
+
+  if (Tok.isNot(AsmToken::Identifier))
+    return MatchOperand_NoMatch;
+
+  ARM64SYS::CPSRField Field =
+      StringSwitch<ARM64SYS::CPSRField>(Tok.getString().lower())
+          .Case("spsel", ARM64SYS::cpsr_SPSel)
+          .Case("daifset", ARM64SYS::cpsr_DAIFSet)
+          .Case("daifclr", ARM64SYS::cpsr_DAIFClr)
+          .Default(ARM64SYS::InvalidCPSRField);
+  if (Field == ARM64SYS::InvalidCPSRField)
+    return MatchOperand_NoMatch;
+  Operands.push_back(
+      ARM64Operand::CreateCPSRField(Field, getLoc(), getContext()));
+  Parser.Lex(); // Consume the register name.
+
+  return MatchOperand_Success;
+}
+
+/// tryParseVectorRegister - Parse a vector register operand.
+bool ARM64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
+  if (Parser.getTok().isNot(AsmToken::Identifier))
+    return true;
+
+  SMLoc S = getLoc();
+  // Check for a vector register specifier first.
+  StringRef Kind;
+  int64_t Reg = tryMatchVectorRegister(Kind);
+  if (Reg == -1)
+    return true;
+  Operands.push_back(
+      ARM64Operand::CreateReg(Reg, true, S, getLoc(), getContext()));
+  // If there was an explicit qualifier, that goes on as a literal text
+  // operand.
+  if (!Kind.empty())
+    Operands.push_back(ARM64Operand::CreateToken(Kind, false, S, getContext()));
+
+  // If there is an index specifier following the register, parse that too.
+  if (Parser.getTok().is(AsmToken::LBrac)) {
+    SMLoc SIdx = getLoc();
+    Parser.Lex(); // Eat left bracket token.
+
+    const MCExpr *ImmVal;
+    if (getParser().parseExpression(ImmVal))
+      return MatchOperand_ParseFail;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+    if (!MCE) {
+      TokError("immediate value expected for vector index");
+      return MatchOperand_ParseFail;
+    }
+
+    SMLoc E = getLoc();
+    if (Parser.getTok().isNot(AsmToken::RBrac)) {
+      Error(E, "']' expected");
+      return MatchOperand_ParseFail;
+    }
+
+    Parser.Lex(); // Eat right bracket token.
+
+    Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E,
+                                                       getContext()));
+  }
+
+  return false;
+}
+
+/// parseRegister - Parse a non-vector register operand.
+bool ARM64AsmParser::parseRegister(OperandVector &Operands) {
+  SMLoc S = getLoc();
+  // Try for a vector register.
+  if (!tryParseVectorRegister(Operands))
+    return false;
+
+  // Try for a scalar register.
+  int64_t Reg = tryParseRegister();
+  if (Reg == -1)
+    return true;
+  Operands.push_back(
+      ARM64Operand::CreateReg(Reg, false, S, getLoc(), getContext()));
+
+  // A small number of instructions (FMOVXDhighr, for example) have "[1]"
+  // as a string token in the instruction itself.
+  if (getLexer().getKind() == AsmToken::LBrac) {
+    SMLoc LBracS = getLoc();
+    Parser.Lex();
+    const AsmToken &Tok = Parser.getTok();
+    if (Tok.is(AsmToken::Integer)) {
+      SMLoc IntS = getLoc();
+      int64_t Val = Tok.getIntVal();
+      if (Val == 1) {
+        Parser.Lex();
+        if (getLexer().getKind() == AsmToken::RBrac) {
+          SMLoc RBracS = getLoc();
+          Parser.Lex();
+          Operands.push_back(
+              ARM64Operand::CreateToken("[", false, LBracS, getContext()));
+          Operands.push_back(
+              ARM64Operand::CreateToken("1", false, IntS, getContext()));
+          Operands.push_back(
+              ARM64Operand::CreateToken("]", false, RBracS, getContext()));
+          return false;
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+/// tryParseNoIndexMemory - Custom parser method for memory operands that
+///                         do not allow base regisrer writeback modes,
+///                         or those that handle writeback separately from
+///                         the memory operand (like the AdvSIMD ldX/stX
+///                         instructions.
+ARM64AsmParser::OperandMatchResultTy
+ARM64AsmParser::tryParseNoIndexMemory(OperandVector &Operands) {
+  if (Parser.getTok().isNot(AsmToken::LBrac))
+    return MatchOperand_NoMatch;
+  SMLoc S = getLoc();
+  Parser.Lex(); // Eat left bracket token.
+
+  const AsmToken &BaseRegTok = Parser.getTok();
+  if (BaseRegTok.isNot(AsmToken::Identifier)) {
+    Error(BaseRegTok.getLoc(), "register expected");
+    return MatchOperand_ParseFail;
+  }
+
+  int64_t Reg = tryParseRegister();
+  if (Reg == -1) {
+    Error(BaseRegTok.getLoc(), "register expected");
+    return MatchOperand_ParseFail;
+  }
+
+  SMLoc E = getLoc();
+  if (Parser.getTok().isNot(AsmToken::RBrac)) {
+    Error(E, "']' expected");
+    return MatchOperand_ParseFail;
+  }
+
+  Parser.Lex(); // Eat right bracket token.
+
+  Operands.push_back(ARM64Operand::CreateMem(Reg, 0, S, E, E, getContext()));
+  return MatchOperand_Success;
+}
+
+/// parseMemory - Parse a memory operand for a basic load/store instruction.
+bool ARM64AsmParser::parseMemory(OperandVector &Operands) {
+  assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket");
+  SMLoc S = getLoc();
+  Parser.Lex(); // Eat left bracket token.
+
+  const AsmToken &BaseRegTok = Parser.getTok();
+  if (BaseRegTok.isNot(AsmToken::Identifier))
+    return Error(BaseRegTok.getLoc(), "register expected");
+
+  int64_t Reg = tryParseRegister();
+  if (Reg == -1)
+    return Error(BaseRegTok.getLoc(), "register expected");
+
+  // If there is an offset expression, parse it.
+  const MCExpr *OffsetExpr = 0;
+  SMLoc OffsetLoc;
+  if (Parser.getTok().is(AsmToken::Comma)) {
+    Parser.Lex(); // Eat the comma.
+    OffsetLoc = getLoc();
+
+    // Register offset
+    const AsmToken &OffsetRegTok = Parser.getTok();
+    int Reg2 = OffsetRegTok.is(AsmToken::Identifier) ? tryParseRegister() : -1;
+    if (Reg2 != -1) {
+      // Default shift is LSL, with an omitted shift.  We use the third bit of
+      // the extend value to indicate presence/omission of the immediate offset.
+      ARM64_AM::ExtendType ExtOp = ARM64_AM::UXTX;
+      int64_t ShiftVal = 0;
+      bool ExplicitShift = false;
+
+      if (Parser.getTok().is(AsmToken::Comma)) {
+        // Embedded extend operand.
+        Parser.Lex(); // Eat the comma
+
+        SMLoc ExtLoc = getLoc();
+        const AsmToken &Tok = Parser.getTok();
+        ExtOp = StringSwitch<ARM64_AM::ExtendType>(Tok.getString())
+                    .Case("uxtw", ARM64_AM::UXTW)
+                    .Case("lsl", ARM64_AM::UXTX) // Alias for UXTX
+                    .Case("sxtw", ARM64_AM::SXTW)
+                    .Case("sxtx", ARM64_AM::SXTX)
+                    .Case("UXTW", ARM64_AM::UXTW)
+                    .Case("LSL", ARM64_AM::UXTX) // Alias for UXTX
+                    .Case("SXTW", ARM64_AM::SXTW)
+                    .Case("SXTX", ARM64_AM::SXTX)
+                    .Default(ARM64_AM::InvalidExtend);
+        if (ExtOp == ARM64_AM::InvalidExtend)
+          return Error(ExtLoc, "expected valid extend operation");
+
+        Parser.Lex(); // Eat the extend op.
+
+        if (getLexer().is(AsmToken::RBrac)) {
+          // No immediate operand.
+          if (ExtOp == ARM64_AM::UXTX)
+            return Error(ExtLoc, "LSL extend requires immediate operand");
+        } else if (getLexer().is(AsmToken::Hash)) {
+          // Immediate operand.
+          Parser.Lex(); // Eat the '#'
+          const MCExpr *ImmVal;
+          SMLoc ExprLoc = getLoc();
+          if (getParser().parseExpression(ImmVal))
+            return true;
+          const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+          if (!MCE)
+            return TokError("immediate value expected for extend operand");
+
+          ExplicitShift = true;
+          ShiftVal = MCE->getValue();
+          if (ShiftVal < 0 || ShiftVal > 4)
+            return Error(ExprLoc, "immediate operand out of range");
+        } else
+          return Error(getLoc(), "expected immediate operand");
+      }
+
+      if (Parser.getTok().isNot(AsmToken::RBrac))
+        return Error(getLoc(), "']' expected");
+
+      Parser.Lex(); // Eat right bracket token.
+
+      SMLoc E = getLoc();
+      Operands.push_back(ARM64Operand::CreateRegOffsetMem(
+          Reg, Reg2, ExtOp, ShiftVal, ExplicitShift, S, E, getContext()));
+      return false;
+
+      // Immediate expressions.
+    } else if (Parser.getTok().is(AsmToken::Hash)) {
+      Parser.Lex(); // Eat hash token.
+
+      if (parseSymbolicImmVal(OffsetExpr))
+        return true;
+    } else {
+      // FIXME: We really should make sure that we're dealing with a LDR/STR
+      // instruction that can legally have a symbolic expression here.
+      // Symbol reference.
+      if (Parser.getTok().isNot(AsmToken::Identifier) &&
+          Parser.getTok().isNot(AsmToken::String))
+        return Error(getLoc(), "identifier or immediate expression expected");
+      if (getParser().parseExpression(OffsetExpr))
+        return true;
+      // If this is a plain ref, Make sure a legal variant kind was specified.
+      // Otherwise, it's a more complicated expression and we have to just
+      // assume it's OK and let the relocation stuff puke if it's not.
+      ARM64MCExpr::VariantKind ELFRefKind;
+      MCSymbolRefExpr::VariantKind DarwinRefKind;
+      const MCConstantExpr *Addend;
+      if (classifySymbolRef(OffsetExpr, ELFRefKind, DarwinRefKind, Addend) &&
+          Addend == 0) {
+        assert(ELFRefKind == ARM64MCExpr::VK_INVALID &&
+               "ELF symbol modifiers not supported here yet");
+
+        switch (DarwinRefKind) {
+        default:
+          return Error(getLoc(), "expected @pageoff or @gotpageoff modifier");
+        case MCSymbolRefExpr::VK_GOTPAGEOFF:
+        case MCSymbolRefExpr::VK_PAGEOFF:
+        case MCSymbolRefExpr::VK_TLVPPAGEOFF:
+          // These are what we're expecting.
+          break;
+        }
+      }
+    }
+  }
+
+  SMLoc E = getLoc();
+  if (Parser.getTok().isNot(AsmToken::RBrac))
+    return Error(E, "']' expected");
+
+  Parser.Lex(); // Eat right bracket token.
+
+  // Create the memory operand.
+  Operands.push_back(
+      ARM64Operand::CreateMem(Reg, OffsetExpr, S, E, OffsetLoc, getContext()));
+
+  // Check for a '!', indicating pre-indexed addressing with writeback.
+  if (Parser.getTok().is(AsmToken::Exclaim)) {
+    // There needs to have been an immediate or wback doesn't make sense.
+    if (!OffsetExpr)
+      return Error(E, "missing offset for pre-indexed addressing");
+    // Pre-indexed with writeback must have a constant expression for the
+    // offset. FIXME: Theoretically, we'd like to allow fixups so long
+    // as they don't require a relocation.
+    if (!isa<MCConstantExpr>(OffsetExpr))
+      return Error(OffsetLoc, "constant immediate expression expected");
+
+    // Create the Token operand for the '!'.
+    Operands.push_back(ARM64Operand::CreateToken(
+        "!", false, Parser.getTok().getLoc(), getContext()));
+    Parser.Lex(); // Eat the '!' token.
+  }
+
+  return false;
+}
+
+bool ARM64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
+  bool HasELFModifier = false;
+  ARM64MCExpr::VariantKind RefKind;
+
+  if (Parser.getTok().is(AsmToken::Colon)) {
+    Parser.Lex(); // Eat ':"
+    HasELFModifier = true;
+
+    if (Parser.getTok().isNot(AsmToken::Identifier)) {
+      Error(Parser.getTok().getLoc(),
+            "expect relocation specifier in operand after ':'");
+      return true;
+    }
+
+    std::string LowerCase = Parser.getTok().getIdentifier().lower();
+    RefKind = StringSwitch<ARM64MCExpr::VariantKind>(LowerCase)
+                  .Case("lo12", ARM64MCExpr::VK_LO12)
+                  .Case("abs_g3", ARM64MCExpr::VK_ABS_G3)
+                  .Case("abs_g2", ARM64MCExpr::VK_ABS_G2)
+                  .Case("abs_g2_nc", ARM64MCExpr::VK_ABS_G2_NC)
+                  .Case("abs_g1", ARM64MCExpr::VK_ABS_G1)
+                  .Case("abs_g1_nc", ARM64MCExpr::VK_ABS_G1_NC)
+                  .Case("abs_g0", ARM64MCExpr::VK_ABS_G0)
+                  .Case("abs_g0_nc", ARM64MCExpr::VK_ABS_G0_NC)
+                  .Case("dtprel_g2", ARM64MCExpr::VK_DTPREL_G2)
+                  .Case("dtprel_g1", ARM64MCExpr::VK_DTPREL_G1)
+                  .Case("dtprel_g1_nc", ARM64MCExpr::VK_DTPREL_G1_NC)
+                  .Case("dtprel_g0", ARM64MCExpr::VK_DTPREL_G0)
+                  .Case("dtprel_g0_nc", ARM64MCExpr::VK_DTPREL_G0_NC)
+                  .Case("dtprel_lo12", ARM64MCExpr::VK_DTPREL_LO12)
+                  .Case("dtprel_lo12_nc", ARM64MCExpr::VK_DTPREL_LO12_NC)
+                  .Case("tprel_g2", ARM64MCExpr::VK_TPREL_G2)
+                  .Case("tprel_g1", ARM64MCExpr::VK_TPREL_G1)
+                  .Case("tprel_g1_nc", ARM64MCExpr::VK_TPREL_G1_NC)
+                  .Case("tprel_g0", ARM64MCExpr::VK_TPREL_G0)
+                  .Case("tprel_g0_nc", ARM64MCExpr::VK_TPREL_G0_NC)
+                  .Case("tprel_lo12", ARM64MCExpr::VK_TPREL_LO12)
+                  .Case("tprel_lo12_nc", ARM64MCExpr::VK_TPREL_LO12_NC)
+                  .Case("tlsdesc_lo12", ARM64MCExpr::VK_TLSDESC_LO12)
+                  .Case("got", ARM64MCExpr::VK_GOT_PAGE)
+                  .Case("got_lo12", ARM64MCExpr::VK_GOT_LO12)
+                  .Case("gottprel", ARM64MCExpr::VK_GOTTPREL_PAGE)
+                  .Case("gottprel_lo12", ARM64MCExpr::VK_GOTTPREL_LO12_NC)
+                  .Case("gottprel_g1", ARM64MCExpr::VK_GOTTPREL_G1)
+                  .Case("gottprel_g0_nc", ARM64MCExpr::VK_GOTTPREL_G0_NC)
+                  .Case("tlsdesc", ARM64MCExpr::VK_TLSDESC_PAGE)
+                  .Default(ARM64MCExpr::VK_INVALID);
+
+    if (RefKind == ARM64MCExpr::VK_INVALID) {
+      Error(Parser.getTok().getLoc(),
+            "expect relocation specifier in operand after ':'");
+      return true;
+    }
+
+    Parser.Lex(); // Eat identifier
+
+    if (Parser.getTok().isNot(AsmToken::Colon)) {
+      Error(Parser.getTok().getLoc(), "expect ':' after relocation specifier");
+      return true;
+    }
+    Parser.Lex(); // Eat ':'
+  }
+
+  if (getParser().parseExpression(ImmVal))
+    return true;
+
+  if (HasELFModifier)
+    ImmVal = ARM64MCExpr::Create(ImmVal, RefKind, getContext());
+
+  return false;
+}
+
+/// parseVectorList - Parse a vector list operand for AdvSIMD instructions.
+bool ARM64AsmParser::parseVectorList(OperandVector &Operands) {
+  assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Bracket");
+  SMLoc S = getLoc();
+  Parser.Lex(); // Eat left bracket token.
+  StringRef Kind;
+  int64_t FirstReg = tryMatchVectorRegister(Kind);
+  if (FirstReg == -1)
+    return Error(getLoc(), "vector register expected");
+  int64_t PrevReg = FirstReg;
+  unsigned Count = 1;
+  while (Parser.getTok().isNot(AsmToken::RCurly)) {
+    if (Parser.getTok().is(AsmToken::EndOfStatement))
+      Error(getLoc(), "'}' expected");
+
+    if (Parser.getTok().isNot(AsmToken::Comma))
+      return Error(getLoc(), "',' expected");
+    Parser.Lex(); // Eat the comma token.
+
+    SMLoc Loc = getLoc();
+    StringRef NextKind;
+    int64_t Reg = tryMatchVectorRegister(NextKind);
+    if (Reg == -1)
+      return Error(Loc, "vector register expected");
+    // Any Kind suffices must match on all regs in the list.
+    if (Kind != NextKind)
+      return Error(Loc, "mismatched register size suffix");
+
+    // Registers must be incremental (with wraparound at 31)
+    if (getContext().getRegisterInfo()->getEncodingValue(Reg) !=
+        (getContext().getRegisterInfo()->getEncodingValue(PrevReg) + 1) % 32)
+      return Error(Loc, "registers must be sequential");
+
+    PrevReg = Reg;
+    ++Count;
+  }
+  Parser.Lex(); // Eat the '}' token.
+
+  unsigned NumElements = 0;
+  char ElementKind = 0;
+  if (!Kind.empty())
+    parseValidVectorKind(Kind, NumElements, ElementKind);
+
+  Operands.push_back(ARM64Operand::CreateVectorList(
+      FirstReg, Count, NumElements, ElementKind, S, getLoc(), getContext()));
+
+  // If there is an index specifier following the list, parse that too.
+  if (Parser.getTok().is(AsmToken::LBrac)) {
+    SMLoc SIdx = getLoc();
+    Parser.Lex(); // Eat left bracket token.
+
+    const MCExpr *ImmVal;
+    if (getParser().parseExpression(ImmVal))
+      return MatchOperand_ParseFail;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+    if (!MCE) {
+      TokError("immediate value expected for vector index");
+      return MatchOperand_ParseFail;
+    }
+
+    SMLoc E = getLoc();
+    if (Parser.getTok().isNot(AsmToken::RBrac)) {
+      Error(E, "']' expected");
+      return MatchOperand_ParseFail;
+    }
+
+    Parser.Lex(); // Eat right bracket token.
+
+    Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E,
+                                                       getContext()));
+  }
+  return false;
+}
+
+/// parseOperand - Parse a arm instruction operand.  For now this parses the
+/// operand regardless of the mnemonic.
+bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
+                                  bool invertCondCode) {
+  // Check if the current operand has a custom associated parser, if so, try to
+  // custom parse the operand, or fallback to the general approach.
+  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+  if (ResTy == MatchOperand_Success)
+    return false;
+  // If there wasn't a custom match, try the generic matcher below. Otherwise,
+  // there was a match, but an error occurred, in which case, just return that
+  // the operand parsing failed.
+  if (ResTy == MatchOperand_ParseFail)
+    return true;
+
+  // Nothing custom, so do general case parsing.
+  SMLoc S, E;
+  switch (getLexer().getKind()) {
+  default: {
+    SMLoc S = getLoc();
+    const MCExpr *Expr;
+    if (parseSymbolicImmVal(Expr))
+      return Error(S, "invalid operand");
+
+    SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+    Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext()));
+    return false;
+  }
+  case AsmToken::LBrac:
+    return parseMemory(Operands);
+  case AsmToken::LCurly:
+    return parseVectorList(Operands);
+  case AsmToken::Identifier: {
+    // If we're expecting a Condition Code operand, then just parse that.
+    if (isCondCode)
+      return parseCondCode(Operands, invertCondCode);
+
+    // If it's a register name, parse it.
+    if (!parseRegister(Operands))
+      return false;
+
+    // This could be an optional "shift" operand.
+    if (!parseOptionalShift(Operands))
+      return false;
+
+    // Or maybe it could be an optional "extend" operand.
+    if (!parseOptionalExtend(Operands))
+      return false;
+
+    // This was not a register so parse other operands that start with an
+    // identifier (like labels) as expressions and create them as immediates.
+    const MCExpr *IdVal;
+    S = getLoc();
+    if (getParser().parseExpression(IdVal))
+      return true;
+
+    E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+    Operands.push_back(ARM64Operand::CreateImm(IdVal, S, E, getContext()));
+    return false;
+  }
+  case AsmToken::Hash: {
+    // #42 -> immediate.
+    S = getLoc();
+    Parser.Lex();
+
+    // The only Real that should come through here is a literal #0.0 for
+    // the fcmp[e] r, #0.0 instructions. They expect raw token operands,
+    // so convert the value.
+    const AsmToken &Tok = Parser.getTok();
+    if (Tok.is(AsmToken::Real)) {
+      APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+      uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+      if (IntVal != 0 || (Mnemonic != "fcmp" && Mnemonic != "fcmpe"))
+        return TokError("unexpected floating point literal");
+      Parser.Lex(); // Eat the token.
+
+      Operands.push_back(
+          ARM64Operand::CreateToken("#0", false, S, getContext()));
+      Operands.push_back(
+          ARM64Operand::CreateToken(".0", false, S, getContext()));
+      return false;
+    }
+
+    const MCExpr *ImmVal;
+    if (parseSymbolicImmVal(ImmVal))
+      return true;
+
+    E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+    Operands.push_back(ARM64Operand::CreateImm(ImmVal, S, E, getContext()));
+    return false;
+  }
+  }
+}
+
+/// ParseInstruction - Parse an ARM64 instruction mnemonic followed by its
+/// operands.
+bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
+                                      StringRef Name, SMLoc NameLoc,
+                                      OperandVector &Operands) {
+  // Create the leading tokens for the mnemonic, split by '.' characters.
+  size_t Start = 0, Next = Name.find('.');
+  StringRef Head = Name.slice(Start, Next);
+
+  // IC, DC, AT, and TLBI instructions are aliases for the SYS instruction.
+  if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi")
+    return parseSysAlias(Head, NameLoc, Operands);
+
+  Operands.push_back(
+      ARM64Operand::CreateToken(Head, false, NameLoc, getContext()));
+  Mnemonic = Head;
+
+  // Handle condition codes for a branch mnemonic
+  if (Head == "b" && Next != StringRef::npos) {
+    Start = Next;
+    Next = Name.find('.', Start + 1);
+    Head = Name.slice(Start + 1, Next);
+
+    SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
+                                            (Head.data() - Name.data()));
+    unsigned CC = parseCondCodeString(Head);
+    if (CC == ~0U)
+      return Error(SuffixLoc, "invalid condition code");
+    const MCExpr *CCExpr = MCConstantExpr::Create(CC, getContext());
+    Operands.push_back(
+        ARM64Operand::CreateImm(CCExpr, NameLoc, NameLoc, getContext()));
+  }
+
+  // Add the remaining tokens in the mnemonic.
+  while (Next != StringRef::npos) {
+    Start = Next;
+    Next = Name.find('.', Start + 1);
+    Head = Name.slice(Start, Next);
+    SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
+                                            (Head.data() - Name.data()) + 1);
+    Operands.push_back(
+        ARM64Operand::CreateToken(Head, true, SuffixLoc, getContext()));
+  }
+
+  // Conditional compare instructions have a Condition Code operand, which needs
+  // to be parsed and an immediate operand created.
+  bool condCodeFourthOperand =
+      (Head == "ccmp" || Head == "ccmn" || Head == "fccmp" ||
+       Head == "fccmpe" || Head == "fcsel" || Head == "csel" ||
+       Head == "csinc" || Head == "csinv" || Head == "csneg");
+
+  // These instructions are aliases to some of the conditional select
+  // instructions. However, the condition code is inverted in the aliased
+  // instruction.
+  //
+  // FIXME: Is this the correct way to handle these? Or should the parser
+  //        generate the aliased instructions directly?
+  bool condCodeSecondOperand = (Head == "cset" || Head == "csetm");
+  bool condCodeThirdOperand =
+      (Head == "cinc" || Head == "cinv" || Head == "cneg");
+
+  // Read the remaining operands.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    // Read the first operand.
+    if (parseOperand(Operands, false, false)) {
+      Parser.eatToEndOfStatement();
+      return true;
+    }
+
+    unsigned N = 2;
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex(); // Eat the comma.
+
+      // Parse and remember the operand.
+      if (parseOperand(Operands, (N == 4 && condCodeFourthOperand) ||
+                                     (N == 3 && condCodeThirdOperand) ||
+                                     (N == 2 && condCodeSecondOperand),
+                       condCodeSecondOperand || condCodeThirdOperand)) {
+        Parser.eatToEndOfStatement();
+        return true;
+      }
+
+      ++N;
+    }
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    SMLoc Loc = Parser.getTok().getLoc();
+    Parser.eatToEndOfStatement();
+    return Error(Loc, "unexpected token in argument list");
+  }
+
+  Parser.Lex(); // Consume the EndOfStatement
+  return false;
+}
+
+/// isFPR32Register - Check if a register is in the FPR32 register class.
+/// (The parser does not have the target register info to check the register
+/// class directly.)
+static bool isFPR32Register(unsigned Reg) {
+  using namespace ARM64;
+  switch (Reg) {
+  default:
+    break;
+  case S0:  case S1:  case S2:  case S3:  case S4:  case S5:  case S6:
+  case S7:  case S8:  case S9:  case S10:  case S11:  case S12:  case S13:
+  case S14:  case S15:  case S16:  case S17:  case S18:  case S19:  case S20:
+  case S21:  case S22:  case S23:  case S24:  case S25:  case S26:  case S27:
+  case S28:  case S29:  case S30:  case S31:
+    return true;
+  }
+  return false;
+}
+
+/// isGPR32Register - Check if a register is in the GPR32sp register class.
+/// (The parser does not have the target register info to check the register
+/// class directly.)
+static bool isGPR32Register(unsigned Reg) {
+  using namespace ARM64;
+  switch (Reg) {
+  default:
+    break;
+  case W0:  case W1:  case W2:  case W3:  case W4:  case W5:  case W6:
+  case W7:  case W8:  case W9:  case W10:  case W11:  case W12:  case W13:
+  case W14:  case W15:  case W16:  case W17:  case W18:  case W19:  case W20:
+  case W21:  case W22:  case W23:  case W24:  case W25:  case W26:  case W27:
+  case W28:  case W29:  case W30:  case WSP:
+    return true;
+  }
+  return false;
+}
+
+static bool isGPR64Reg(unsigned Reg) {
+  using namespace ARM64;
+  switch (Reg) {
+  case X0:  case X1:  case X2:  case X3:  case X4:  case X5:  case X6:
+  case X7:  case X8:  case X9:  case X10:  case X11:  case X12:  case X13:
+  case X14:  case X15:  case X16:  case X17:  case X18:  case X19:  case X20:
+  case X21:  case X22:  case X23:  case X24:  case X25:  case X26:  case X27:
+  case X28:  case FP:  case LR:  case SP:  case XZR:
+    return true;
+  default:
+    return false;
+  }
+}
+
+
+// FIXME: This entire function is a giant hack to provide us with decent
+// operand range validation/diagnostics until TableGen/MC can be extended
+// to support autogeneration of this kind of validation.
+bool ARM64AsmParser::validateInstruction(MCInst &Inst,
+                                         SmallVectorImpl<SMLoc> &Loc) {
+  const MCRegisterInfo *RI = getContext().getRegisterInfo();
+  // Check for indexed addressing modes w/ the base register being the
+  // same as a destination/source register or pair load where
+  // the Rt == Rt2. All of those are undefined behaviour.
+  switch (Inst.getOpcode()) {
+  case ARM64::LDPSWpre:
+  case ARM64::LDPWpost:
+  case ARM64::LDPWpre:
+  case ARM64::LDPXpost:
+  case ARM64::LDPXpre: {
+    unsigned Rt = Inst.getOperand(0).getReg();
+    unsigned Rt2 = Inst.getOperand(1).getReg();
+    unsigned Rn = Inst.getOperand(2).getReg();
+    if (RI->isSubRegisterEq(Rn, Rt))
+      return Error(Loc[0], "unpredictable LDP instruction, writeback base "
+                           "is also a destination");
+    if (RI->isSubRegisterEq(Rn, Rt2))
+      return Error(Loc[1], "unpredictable LDP instruction, writeback base "
+                           "is also a destination");
+    // FALLTHROUGH
+  }
+  case ARM64::LDPDpost:
+  case ARM64::LDPDpre:
+  case ARM64::LDPQpost:
+  case ARM64::LDPQpre:
+  case ARM64::LDPSpost:
+  case ARM64::LDPSpre:
+  case ARM64::LDPSWpost:
+  case ARM64::LDPDi:
+  case ARM64::LDPQi:
+  case ARM64::LDPSi:
+  case ARM64::LDPSWi:
+  case ARM64::LDPWi:
+  case ARM64::LDPXi: {
+    unsigned Rt = Inst.getOperand(0).getReg();
+    unsigned Rt2 = Inst.getOperand(1).getReg();
+    if (Rt == Rt2)
+      return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt");
+    break;
+  }
+  case ARM64::STPDpost:
+  case ARM64::STPDpre:
+  case ARM64::STPQpost:
+  case ARM64::STPQpre:
+  case ARM64::STPSpost:
+  case ARM64::STPSpre:
+  case ARM64::STPWpost:
+  case ARM64::STPWpre:
+  case ARM64::STPXpost:
+  case ARM64::STPXpre: {
+    unsigned Rt = Inst.getOperand(0).getReg();
+    unsigned Rt2 = Inst.getOperand(1).getReg();
+    unsigned Rn = Inst.getOperand(2).getReg();
+    if (RI->isSubRegisterEq(Rn, Rt))
+      return Error(Loc[0], "unpredictable STP instruction, writeback base "
+                           "is also a source");
+    if (RI->isSubRegisterEq(Rn, Rt2))
+      return Error(Loc[1], "unpredictable STP instruction, writeback base "
+                           "is also a source");
+    break;
+  }
+  case ARM64::LDRBBpre:
+  case ARM64::LDRBpre:
+  case ARM64::LDRHHpre:
+  case ARM64::LDRHpre:
+  case ARM64::LDRSBWpre:
+  case ARM64::LDRSBXpre:
+  case ARM64::LDRSHWpre:
+  case ARM64::LDRSHXpre:
+  case ARM64::LDRSWpre:
+  case ARM64::LDRWpre:
+  case ARM64::LDRXpre:
+  case ARM64::LDRBBpost:
+  case ARM64::LDRBpost:
+  case ARM64::LDRHHpost:
+  case ARM64::LDRHpost:
+  case ARM64::LDRSBWpost:
+  case ARM64::LDRSBXpost:
+  case ARM64::LDRSHWpost:
+  case ARM64::LDRSHXpost:
+  case ARM64::LDRSWpost:
+  case ARM64::LDRWpost:
+  case ARM64::LDRXpost: {
+    unsigned Rt = Inst.getOperand(0).getReg();
+    unsigned Rn = Inst.getOperand(1).getReg();
+    if (RI->isSubRegisterEq(Rn, Rt))
+      return Error(Loc[0], "unpredictable LDR instruction, writeback base "
+                           "is also a source");
+    break;
+  }
+  case ARM64::STRBBpost:
+  case ARM64::STRBpost:
+  case ARM64::STRHHpost:
+  case ARM64::STRHpost:
+  case ARM64::STRWpost:
+  case ARM64::STRXpost:
+  case ARM64::STRBBpre:
+  case ARM64::STRBpre:
+  case ARM64::STRHHpre:
+  case ARM64::STRHpre:
+  case ARM64::STRWpre:
+  case ARM64::STRXpre: {
+    unsigned Rt = Inst.getOperand(0).getReg();
+    unsigned Rn = Inst.getOperand(1).getReg();
+    if (RI->isSubRegisterEq(Rn, Rt))
+      return Error(Loc[0], "unpredictable STR instruction, writeback base "
+                           "is also a source");
+    break;
+  }
+  }
+
+  // Now check immediate ranges. Separate from the above as there is overlap
+  // in the instructions being checked and this keeps the nested conditionals
+  // to a minimum.
+  switch (Inst.getOpcode()) {
+  case ARM64::ANDWrs:
+  case ARM64::ANDSWrs:
+  case ARM64::EORWrs:
+  case ARM64::ORRWrs: {
+    if (!Inst.getOperand(3).isImm())
+      return Error(Loc[3], "immediate value expected");
+    int64_t shifter = Inst.getOperand(3).getImm();
+    ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(shifter);
+    if (ST == ARM64_AM::LSL && shifter > 31)
+      return Error(Loc[3], "shift value out of range");
+    return false;
+  }
+  case ARM64::ADDSWri:
+  case ARM64::ADDSXri:
+  case ARM64::ADDWri:
+  case ARM64::ADDXri:
+  case ARM64::SUBSWri:
+  case ARM64::SUBSXri:
+  case ARM64::SUBWri:
+  case ARM64::SUBXri: {
+    if (!Inst.getOperand(3).isImm())
+      return Error(Loc[3], "immediate value expected");
+    int64_t shifter = Inst.getOperand(3).getImm();
+    if (shifter != 0 && shifter != 12)
+      return Error(Loc[3], "shift value out of range");
+    // The imm12 operand can be an expression. Validate that it's legit.
+    // FIXME: We really, really want to allow arbitrary expressions here
+    // and resolve the value and validate the result at fixup time, but
+    // that's hard as we have long since lost any source information we
+    // need to generate good diagnostics by that point.
+    if (Inst.getOpcode() == ARM64::ADDXri && Inst.getOperand(2).isExpr()) {
+      const MCExpr *Expr = Inst.getOperand(2).getExpr();
+      ARM64MCExpr::VariantKind ELFRefKind;
+      MCSymbolRefExpr::VariantKind DarwinRefKind;
+      const MCConstantExpr *Addend;
+      if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
+        return Error(Loc[2], "invalid immediate expression");
+      }
+
+      if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF ||
+          DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF ||
+          ELFRefKind == ARM64MCExpr::VK_LO12 ||
+          ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 ||
+          ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC ||
+          ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 ||
+          ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC ||
+          ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) {
+        // Note that we don't range-check the addend. It's adjusted
+        // modulo page size when converted, so there is no "out of range"
+        // condition when using @pageoff. Any validity checking for the value
+        // was done in the is*() predicate function.
+        return false;
+      } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF) {
+        // @gotpageoff can only be used directly, not with an addend.
+        return Addend != 0;
+      }
+
+      // Otherwise, we're not sure, so don't allow it for now.
+      return Error(Loc[2], "invalid immediate expression");
+    }
+
+    // If it's anything but an immediate, it's not legit.
+    if (!Inst.getOperand(2).isImm())
+      return Error(Loc[2], "invalid immediate expression");
+    int64_t imm = Inst.getOperand(2).getImm();
+    if (imm > 4095 || imm < 0)
+      return Error(Loc[2], "immediate value out of range");
+    return false;
+  }
+  case ARM64::LDRBpre:
+  case ARM64::LDRHpre:
+  case ARM64::LDRSBWpre:
+  case ARM64::LDRSBXpre:
+  case ARM64::LDRSHWpre:
+  case ARM64::LDRSHXpre:
+  case ARM64::LDRWpre:
+  case ARM64::LDRXpre:
+  case ARM64::LDRSpre:
+  case ARM64::LDRDpre:
+  case ARM64::LDRQpre:
+  case ARM64::STRBpre:
+  case ARM64::STRHpre:
+  case ARM64::STRWpre:
+  case ARM64::STRXpre:
+  case ARM64::STRSpre:
+  case ARM64::STRDpre:
+  case ARM64::STRQpre:
+  case ARM64::LDRBpost:
+  case ARM64::LDRHpost:
+  case ARM64::LDRSBWpost:
+  case ARM64::LDRSBXpost:
+  case ARM64::LDRSHWpost:
+  case ARM64::LDRSHXpost:
+  case ARM64::LDRWpost:
+  case ARM64::LDRXpost:
+  case ARM64::LDRSpost:
+  case ARM64::LDRDpost:
+  case ARM64::LDRQpost:
+  case ARM64::STRBpost:
+  case ARM64::STRHpost:
+  case ARM64::STRWpost:
+  case ARM64::STRXpost:
+  case ARM64::STRSpost:
+  case ARM64::STRDpost:
+  case ARM64::STRQpost:
+  case ARM64::LDTRXi:
+  case ARM64::LDTRWi:
+  case ARM64::LDTRHi:
+  case ARM64::LDTRBi:
+  case ARM64::LDTRSHWi:
+  case ARM64::LDTRSHXi:
+  case ARM64::LDTRSBWi:
+  case ARM64::LDTRSBXi:
+  case ARM64::LDTRSWi:
+  case ARM64::STTRWi:
+  case ARM64::STTRXi:
+  case ARM64::STTRHi:
+  case ARM64::STTRBi:
+  case ARM64::LDURWi:
+  case ARM64::LDURXi:
+  case ARM64::LDURSi:
+  case ARM64::LDURDi:
+  case ARM64::LDURQi:
+  case ARM64::LDURHi:
+  case ARM64::LDURBi:
+  case ARM64::LDURSHWi:
+  case ARM64::LDURSHXi:
+  case ARM64::LDURSBWi:
+  case ARM64::LDURSBXi:
+  case ARM64::LDURSWi:
+  case ARM64::PRFUMi:
+  case ARM64::STURWi:
+  case ARM64::STURXi:
+  case ARM64::STURSi:
+  case ARM64::STURDi:
+  case ARM64::STURQi:
+  case ARM64::STURHi:
+  case ARM64::STURBi: {
+    // FIXME: Should accept expressions and error in fixup evaluation
+    // if out of range.
+    if (!Inst.getOperand(2).isImm())
+      return Error(Loc[1], "immediate value expected");
+    int64_t offset = Inst.getOperand(2).getImm();
+    if (offset > 255 || offset < -256)
+      return Error(Loc[1], "offset value out of range");
+    return false;
+  }
+  case ARM64::LDRSro:
+  case ARM64::LDRWro:
+  case ARM64::LDRSWro:
+  case ARM64::STRWro:
+  case ARM64::STRSro: {
+    // FIXME: Should accept expressions and error in fixup evaluation
+    // if out of range.
+    if (!Inst.getOperand(3).isImm())
+      return Error(Loc[1], "immediate value expected");
+    int64_t shift = Inst.getOperand(3).getImm();
+    ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift);
+    if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX &&
+        type != ARM64_AM::SXTW && type != ARM64_AM::SXTX)
+      return Error(Loc[1], "shift type invalid");
+    return false;
+  }
+  case ARM64::LDRDro:
+  case ARM64::LDRQro:
+  case ARM64::LDRXro:
+  case ARM64::PRFMro:
+  case ARM64::STRXro:
+  case ARM64::STRDro:
+  case ARM64::STRQro: {
+    // FIXME: Should accept expressions and error in fixup evaluation
+    // if out of range.
+    if (!Inst.getOperand(3).isImm())
+      return Error(Loc[1], "immediate value expected");
+    int64_t shift = Inst.getOperand(3).getImm();
+    ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift);
+    if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX &&
+        type != ARM64_AM::SXTW && type != ARM64_AM::SXTX)
+      return Error(Loc[1], "shift type invalid");
+    return false;
+  }
+  case ARM64::LDRHro:
+  case ARM64::LDRHHro:
+  case ARM64::LDRSHWro:
+  case ARM64::LDRSHXro:
+  case ARM64::STRHro:
+  case ARM64::STRHHro: {
+    // FIXME: Should accept expressions and error in fixup evaluation
+    // if out of range.
+    if (!Inst.getOperand(3).isImm())
+      return Error(Loc[1], "immediate value expected");
+    int64_t shift = Inst.getOperand(3).getImm();
+    ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift);
+    if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX &&
+        type != ARM64_AM::SXTW && type != ARM64_AM::SXTX)
+      return Error(Loc[1], "shift type invalid");
+    return false;
+  }
+  case ARM64::LDRBro:
+  case ARM64::LDRBBro:
+  case ARM64::LDRSBWro:
+  case ARM64::LDRSBXro:
+  case ARM64::STRBro:
+  case ARM64::STRBBro: {
+    // FIXME: Should accept expressions and error in fixup evaluation
+    // if out of range.
+    if (!Inst.getOperand(3).isImm())
+      return Error(Loc[1], "immediate value expected");
+    int64_t shift = Inst.getOperand(3).getImm();
+    ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift);
+    if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX &&
+        type != ARM64_AM::SXTW && type != ARM64_AM::SXTX)
+      return Error(Loc[1], "shift type invalid");
+    return false;
+  }
+  case ARM64::LDPWi:
+  case ARM64::LDPXi:
+  case ARM64::LDPSi:
+  case ARM64::LDPDi:
+  case ARM64::LDPQi:
+  case ARM64::LDPSWi:
+  case ARM64::STPWi:
+  case ARM64::STPXi:
+  case ARM64::STPSi:
+  case ARM64::STPDi:
+  case ARM64::STPQi:
+  case ARM64::LDPWpre:
+  case ARM64::LDPXpre:
+  case ARM64::LDPSpre:
+  case ARM64::LDPDpre:
+  case ARM64::LDPQpre:
+  case ARM64::LDPSWpre:
+  case ARM64::STPWpre:
+  case ARM64::STPXpre:
+  case ARM64::STPSpre:
+  case ARM64::STPDpre:
+  case ARM64::STPQpre:
+  case ARM64::LDPWpost:
+  case ARM64::LDPXpost:
+  case ARM64::LDPSpost:
+  case ARM64::LDPDpost:
+  case ARM64::LDPQpost:
+  case ARM64::LDPSWpost:
+  case ARM64::STPWpost:
+  case ARM64::STPXpost:
+  case ARM64::STPSpost:
+  case ARM64::STPDpost:
+  case ARM64::STPQpost:
+  case ARM64::LDNPWi:
+  case ARM64::LDNPXi:
+  case ARM64::LDNPSi:
+  case ARM64::LDNPDi:
+  case ARM64::LDNPQi:
+  case ARM64::STNPWi:
+  case ARM64::STNPXi:
+  case ARM64::STNPSi:
+  case ARM64::STNPDi:
+  case ARM64::STNPQi: {
+    // FIXME: Should accept expressions and error in fixup evaluation
+    // if out of range.
+    if (!Inst.getOperand(3).isImm())
+      return Error(Loc[2], "immediate value expected");
+    int64_t offset = Inst.getOperand(3).getImm();
+    if (offset > 63 || offset < -64)
+      return Error(Loc[2], "offset value out of range");
+    return false;
+  }
+  default:
+    return false;
+  }
+}
+
+static void rewriteMOV(ARM64AsmParser::OperandVector &Operands,
+                       StringRef mnemonic, uint64_t imm, unsigned shift,
+                       MCContext &Context) {
+  ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[0]);
+  ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
+  Operands[0] =
+      ARM64Operand::CreateToken(mnemonic, false, Op->getStartLoc(), Context);
+
+  const MCExpr *NewImm = MCConstantExpr::Create(imm >> shift, Context);
+  Operands[2] = ARM64Operand::CreateImm(NewImm, Op2->getStartLoc(),
+                                        Op2->getEndLoc(), Context);
+
+  Operands.push_back(ARM64Operand::CreateShifter(
+      ARM64_AM::LSL, shift, Op2->getStartLoc(), Op2->getEndLoc(), Context));
+  delete Op2;
+  delete Op;
+}
+
+bool ARM64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
+  switch (ErrCode) {
+  case Match_MissingFeature:
+    return Error(Loc,
+                 "instruction requires a CPU feature not currently enabled");
+  case Match_InvalidOperand:
+    return Error(Loc, "invalid operand for instruction");
+  case Match_InvalidSuffix:
+    return Error(Loc, "invalid type suffix for instruction");
+  case Match_InvalidMemoryIndexedSImm9:
+    return Error(Loc, "index must be an integer in range [-256,255].");
+  case Match_InvalidMemoryIndexed32SImm7:
+    return Error(Loc, "index must be a multiple of 4 in range [-256,252].");
+  case Match_InvalidMemoryIndexed64SImm7:
+    return Error(Loc, "index must be a multiple of 8 in range [-512,504].");
+  case Match_InvalidMemoryIndexed128SImm7:
+    return Error(Loc, "index must be a multiple of 16 in range [-1024,1008].");
+  case Match_InvalidMemoryIndexed8:
+    return Error(Loc, "index must be an integer in range [0,4095].");
+  case Match_InvalidMemoryIndexed16:
+    return Error(Loc, "index must be a multiple of 2 in range [0,8190].");
+  case Match_InvalidMemoryIndexed32:
+    return Error(Loc, "index must be a multiple of 4 in range [0,16380].");
+  case Match_InvalidMemoryIndexed64:
+    return Error(Loc, "index must be a multiple of 8 in range [0,32760].");
+  case Match_InvalidMemoryIndexed128:
+    return Error(Loc, "index must be a multiple of 16 in range [0,65520].");
+  case Match_InvalidImm1_8:
+    return Error(Loc, "immediate must be an integer in range [1,8].");
+  case Match_InvalidImm1_16:
+    return Error(Loc, "immediate must be an integer in range [1,16].");
+  case Match_InvalidImm1_32:
+    return Error(Loc, "immediate must be an integer in range [1,32].");
+  case Match_InvalidImm1_64:
+    return Error(Loc, "immediate must be an integer in range [1,64].");
+  case Match_MnemonicFail:
+    return Error(Loc, "unrecognized instruction mnemonic");
+  default:
+    assert(0 && "unexpected error code!");
+    return Error(Loc, "invalid instruction format");
+  }
+}
+
+bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                                             OperandVector &Operands,
+                                             MCStreamer &Out,
+                                             unsigned &ErrorInfo,
+                                             bool MatchingInlineAsm) {
+  assert(!Operands.empty() && "Unexpect empty operand list!");
+  ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[0]);
+  assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+
+  StringRef Tok = Op->getToken();
+  // Translate CMN/CMP pseudos to ADDS/SUBS with zero register destination.
+  // This needs to be done before the special handling of ADD/SUB immediates.
+  if (Tok == "cmp" || Tok == "cmn") {
+    // Replace the opcode with either ADDS or SUBS.
+    const char *Repl = StringSwitch<const char *>(Tok)
+                           .Case("cmp", "subs")
+                           .Case("cmn", "adds")
+                           .Default(0);
+    assert(Repl && "Unknown compare instruction");
+    delete Operands[0];
+    Operands[0] = ARM64Operand::CreateToken(Repl, false, IDLoc, getContext());
+
+    // Insert WZR or XZR as destination operand.
+    ARM64Operand *RegOp = static_cast<ARM64Operand *>(Operands[1]);
+    unsigned ZeroReg;
+    if (RegOp->isReg() &&
+        (isGPR32Register(RegOp->getReg()) || RegOp->getReg() == ARM64::WZR))
+      ZeroReg = ARM64::WZR;
+    else
+      ZeroReg = ARM64::XZR;
+    Operands.insert(
+        Operands.begin() + 1,
+        ARM64Operand::CreateReg(ZeroReg, false, IDLoc, IDLoc, getContext()));
+    // Update since we modified it above.
+    ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[0]);
+    Tok = Op->getToken();
+  }
+
+  unsigned NumOperands = Operands.size();
+
+  if (Tok == "mov" && NumOperands == 3) {
+    // The MOV mnemomic is aliased to movn/movz, depending on the value of
+    // the immediate being instantiated.
+    // FIXME: Catching this here is a total hack, and we should use tblgen
+    // support to implement this instead as soon as it is available.
+
+    ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
+    if (Op2->isImm()) {
+      if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op2->getImm())) {
+        uint64_t Val = CE->getValue();
+        uint64_t NVal = ~Val;
+
+        // If this is a 32-bit register and the value has none of the upper
+        // set, clear the complemented upper 32-bits so the logic below works
+        // for 32-bit registers too.
+        ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
+        if (Op1->isReg() && isGPR32Register(Op1->getReg()) &&
+            (Val & 0xFFFFFFFFULL) == Val)
+          NVal &= 0x00000000FFFFFFFFULL;
+
+        // MOVK Rd, imm << 0
+        if ((Val & 0xFFFF) == Val)
+          rewriteMOV(Operands, "movz", Val, 0, getContext());
+
+        // MOVK Rd, imm << 16
+        else if ((Val & 0xFFFF0000ULL) == Val)
+          rewriteMOV(Operands, "movz", Val, 16, getContext());
+
+        // MOVK Rd, imm << 32
+        else if ((Val & 0xFFFF00000000ULL) == Val)
+          rewriteMOV(Operands, "movz", Val, 32, getContext());
+
+        // MOVK Rd, imm << 48
+        else if ((Val & 0xFFFF000000000000ULL) == Val)
+          rewriteMOV(Operands, "movz", Val, 48, getContext());
+
+        // MOVN Rd, (~imm << 0)
+        else if ((NVal & 0xFFFFULL) == NVal)
+          rewriteMOV(Operands, "movn", NVal, 0, getContext());
+
+        // MOVN Rd, ~(imm << 16)
+        else if ((NVal & 0xFFFF0000ULL) == NVal)
+          rewriteMOV(Operands, "movn", NVal, 16, getContext());
+
+        // MOVN Rd, ~(imm << 32)
+        else if ((NVal & 0xFFFF00000000ULL) == NVal)
+          rewriteMOV(Operands, "movn", NVal, 32, getContext());
+
+        // MOVN Rd, ~(imm << 48)
+        else if ((NVal & 0xFFFF000000000000ULL) == NVal)
+          rewriteMOV(Operands, "movn", NVal, 48, getContext());
+      }
+    }
+  } else if (NumOperands == 4) {
+    if (Tok == "add" || Tok == "adds" || Tok == "sub" || Tok == "subs") {
+      // Handle the uimm24 immediate form, where the shift is not specified.
+      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
+      if (Op3->isImm()) {
+        if (const MCConstantExpr *CE =
+                dyn_cast<MCConstantExpr>(Op3->getImm())) {
+          uint64_t Val = CE->getValue();
+          if (Val >= (1 << 24)) {
+            Error(IDLoc, "immediate value is too large");
+            return true;
+          }
+          if (Val < (1 << 12)) {
+            Operands.push_back(ARM64Operand::CreateShifter(
+                ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext()));
+          } else if ((Val & 0xfff) == 0) {
+            delete Operands[3];
+            CE = MCConstantExpr::Create(Val >> 12, getContext());
+            Operands[3] =
+                ARM64Operand::CreateImm(CE, IDLoc, IDLoc, getContext());
+            Operands.push_back(ARM64Operand::CreateShifter(
+                ARM64_AM::LSL, 12, IDLoc, IDLoc, getContext()));
+          } else {
+            Error(IDLoc, "immediate value is too large");
+            return true;
+          }
+        } else {
+          Operands.push_back(ARM64Operand::CreateShifter(
+              ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext()));
+        }
+      }
+
+      // FIXME: Horible hack to handle the LSL -> UBFM alias.
+    } else if (NumOperands == 4 && Tok == "lsl") {
+      ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
+      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
+      if (Op2->isReg() && Op3->isImm()) {
+        const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
+        if (Op3CE) {
+          uint64_t Op3Val = Op3CE->getValue();
+          uint64_t NewOp3Val = 0;
+          uint64_t NewOp4Val = 0;
+          if (isGPR32Register(Op2->getReg()) || Op2->getReg() == ARM64::WZR) {
+            NewOp3Val = (32 - Op3Val) & 0x1f;
+            NewOp4Val = 31 - Op3Val;
+          } else {
+            NewOp3Val = (64 - Op3Val) & 0x3f;
+            NewOp4Val = 63 - Op3Val;
+          }
+
+          const MCExpr *NewOp3 =
+              MCConstantExpr::Create(NewOp3Val, getContext());
+          const MCExpr *NewOp4 =
+              MCConstantExpr::Create(NewOp4Val, getContext());
+
+          Operands[0] = ARM64Operand::CreateToken(
+              "ubfm", false, Op->getStartLoc(), getContext());
+          Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(),
+                                                Op3->getEndLoc(), getContext());
+          Operands.push_back(ARM64Operand::CreateImm(
+              NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext()));
+          delete Op3;
+          delete Op;
+        }
+      }
+
+      // FIXME: Horrible hack to handle the optional LSL shift for vector
+      //        instructions.
+    } else if (NumOperands == 4 && (Tok == "bic" || Tok == "orr")) {
+      ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
+      ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
+      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
+      if ((Op1->isToken() && Op2->isVectorReg() && Op3->isImm()) ||
+          (Op1->isVectorReg() && Op2->isToken() && Op3->isImm()))
+        Operands.push_back(ARM64Operand::CreateShifter(ARM64_AM::LSL, 0, IDLoc,
+                                                       IDLoc, getContext()));
+    } else if (NumOperands == 4 && (Tok == "movi" || Tok == "mvni")) {
+      ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
+      ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
+      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
+      if ((Op1->isToken() && Op2->isVectorReg() && Op3->isImm()) ||
+          (Op1->isVectorReg() && Op2->isToken() && Op3->isImm())) {
+        StringRef Suffix = Op1->isToken() ? Op1->getToken() : Op2->getToken();
+        // Canonicalize on lower-case for ease of comparison.
+        std::string CanonicalSuffix = Suffix.lower();
+        if (Tok != "movi" ||
+            (CanonicalSuffix != ".1d" && CanonicalSuffix != ".2d" &&
+             CanonicalSuffix != ".8b" && CanonicalSuffix != ".16b"))
+          Operands.push_back(ARM64Operand::CreateShifter(
+              ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext()));
+      }
+    }
+  } else if (NumOperands == 5) {
+    // FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and
+    // UBFIZ -> UBFM aliases.
+    if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") {
+      ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
+      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
+      ARM64Operand *Op4 = static_cast<ARM64Operand *>(Operands[4]);
+
+      if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
+        const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
+        const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
+
+        if (Op3CE && Op4CE) {
+          uint64_t Op3Val = Op3CE->getValue();
+          uint64_t Op4Val = Op4CE->getValue();
+
+          uint64_t NewOp3Val = 0;
+          if (isGPR32Register(Op1->getReg()))
+            NewOp3Val = (32 - Op3Val) & 0x1f;
+          else
+            NewOp3Val = (64 - Op3Val) & 0x3f;
+
+          uint64_t NewOp4Val = Op4Val - 1;
+
+          const MCExpr *NewOp3 =
+              MCConstantExpr::Create(NewOp3Val, getContext());
+          const MCExpr *NewOp4 =
+              MCConstantExpr::Create(NewOp4Val, getContext());
+          Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(),
+                                                Op3->getEndLoc(), getContext());
+          Operands[4] = ARM64Operand::CreateImm(NewOp4, Op4->getStartLoc(),
+                                                Op4->getEndLoc(), getContext());
+          if (Tok == "bfi")
+            Operands[0] = ARM64Operand::CreateToken(
+                "bfm", false, Op->getStartLoc(), getContext());
+          else if (Tok == "sbfiz")
+            Operands[0] = ARM64Operand::CreateToken(
+                "sbfm", false, Op->getStartLoc(), getContext());
+          else if (Tok == "ubfiz")
+            Operands[0] = ARM64Operand::CreateToken(
+                "ubfm", false, Op->getStartLoc(), getContext());
+          else
+            llvm_unreachable("No valid mnemonic for alias?");
+
+          delete Op;
+          delete Op3;
+          delete Op4;
+        }
+      }
+
+      // FIXME: Horrible hack to handle the BFXIL->BFM, SBFX->SBFM, and
+      // UBFX -> UBFM aliases.
+    } else if (NumOperands == 5 &&
+               (Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) {
+      ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
+      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
+      ARM64Operand *Op4 = static_cast<ARM64Operand *>(Operands[4]);
+
+      if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
+        const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
+        const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
+
+        if (Op3CE && Op4CE) {
+          uint64_t Op3Val = Op3CE->getValue();
+          uint64_t Op4Val = Op4CE->getValue();
+          uint64_t NewOp4Val = Op3Val + Op4Val - 1;
+
+          if (NewOp4Val >= Op3Val) {
+            const MCExpr *NewOp4 =
+                MCConstantExpr::Create(NewOp4Val, getContext());
+            Operands[4] = ARM64Operand::CreateImm(
+                NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext());
+            if (Tok == "bfxil")
+              Operands[0] = ARM64Operand::CreateToken(
+                  "bfm", false, Op->getStartLoc(), getContext());
+            else if (Tok == "sbfx")
+              Operands[0] = ARM64Operand::CreateToken(
+                  "sbfm", false, Op->getStartLoc(), getContext());
+            else if (Tok == "ubfx")
+              Operands[0] = ARM64Operand::CreateToken(
+                  "ubfm", false, Op->getStartLoc(), getContext());
+            else
+              llvm_unreachable("No valid mnemonic for alias?");
+
+            delete Op;
+            delete Op4;
+          }
+        }
+      }
+    }
+  }
+  // FIXME: Horrible hack for tbz and tbnz with Wn register operand.
+  //        InstAlias can't quite handle this since the reg classes aren't
+  //        subclasses.
+  if (NumOperands == 4 && (Tok == "tbz" || Tok == "tbnz")) {
+    ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]);
+    if (Op->isImm()) {
+      if (const MCConstantExpr *OpCE = dyn_cast<MCConstantExpr>(Op->getImm())) {
+        if (OpCE->getValue() < 32) {
+          // The source register can be Wn here, but the matcher expects a
+          // GPR64. Twiddle it here if necessary.
+          ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[1]);
+          if (Op->isReg()) {
+            unsigned Reg = getXRegFromWReg(Op->getReg());
+            Operands[1] = ARM64Operand::CreateReg(
+                Reg, false, Op->getStartLoc(), Op->getEndLoc(), getContext());
+            delete Op;
+          }
+        }
+      }
+    }
+  }
+  // FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands.
+  //        InstAlias can't quite handle this since the reg classes aren't
+  //        subclasses.
+  if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) {
+    // The source register can be Wn here, but the matcher expects a
+    // GPR64. Twiddle it here if necessary.
+    ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]);
+    if (Op->isReg()) {
+      unsigned Reg = getXRegFromWReg(Op->getReg());
+      Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(),
+                                            Op->getEndLoc(), getContext());
+      delete Op;
+    }
+  }
+  // FIXME: Likewise for [su]xt[bh] with a Xd dst operand
+  else if (NumOperands == 3 &&
+           (Tok == "sxtb" || Tok == "uxtb" || Tok == "sxth" || Tok == "uxth")) {
+    ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[1]);
+    if (Op->isReg() && isGPR64Reg(Op->getReg())) {
+      // The source register can be Wn here, but the matcher expects a
+      // GPR64. Twiddle it here if necessary.
+      ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]);
+      if (Op->isReg()) {
+        unsigned Reg = getXRegFromWReg(Op->getReg());
+        Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(),
+                                              Op->getEndLoc(), getContext());
+        delete Op;
+      }
+    }
+  }
+
+  // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR.
+  if (NumOperands == 3 && Tok == "fmov") {
+    ARM64Operand *RegOp = static_cast<ARM64Operand *>(Operands[1]);
+    ARM64Operand *ImmOp = static_cast<ARM64Operand *>(Operands[2]);
+    if (RegOp->isReg() && ImmOp->isFPImm() &&
+        ImmOp->getFPImm() == (unsigned)-1) {
+      unsigned zreg =
+          isFPR32Register(RegOp->getReg()) ? ARM64::WZR : ARM64::XZR;
+      Operands[2] = ARM64Operand::CreateReg(zreg, false, Op->getStartLoc(),
+                                            Op->getEndLoc(), getContext());
+      delete ImmOp;
+    }
+  }
+
+  // FIXME: Horrible hack to handle the literal .d[1] vector index on
+  // FMOV instructions. The index isn't an actual instruction operand
+  // but rather syntactic sugar. It really should be part of the mnemonic,
+  // not the operand, but whatever.
+  if ((NumOperands == 5) && Tok == "fmov") {
+    // If the last operand is a vectorindex of '1', then replace it with
+    // a '[' '1' ']' token sequence, which is what the matcher
+    // (annoyingly) expects for a literal vector index operand.
+    ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[NumOperands - 1]);
+    if (Op->isVectorIndexD() && Op->getVectorIndex() == 1) {
+      SMLoc Loc = Op->getStartLoc();
+      Operands.pop_back();
+      Operands.push_back(
+          ARM64Operand::CreateToken("[", false, Loc, getContext()));
+      Operands.push_back(
+          ARM64Operand::CreateToken("1", false, Loc, getContext()));
+      Operands.push_back(
+          ARM64Operand::CreateToken("]", false, Loc, getContext()));
+    } else if (Op->isReg()) {
+      // Similarly, check the destination operand for the GPR->High-lane
+      // variant.
+      unsigned OpNo = NumOperands - 2;
+      ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[OpNo]);
+      if (Op->isVectorIndexD() && Op->getVectorIndex() == 1) {
+        SMLoc Loc = Op->getStartLoc();
+        Operands[OpNo] =
+            ARM64Operand::CreateToken("[", false, Loc, getContext());
+        Operands.insert(
+            Operands.begin() + OpNo + 1,
+            ARM64Operand::CreateToken("1", false, Loc, getContext()));
+        Operands.insert(
+            Operands.begin() + OpNo + 2,
+            ARM64Operand::CreateToken("]", false, Loc, getContext()));
+      }
+    }
+  }
+
+  MCInst Inst;
+  // First try to match against the secondary set of tables containing the
+  // short-form NEON instructions (e.g. "fadd.2s v0, v1, v2").
+  unsigned MatchResult =
+      MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 1);
+
+  // If that fails, try against the alternate table containing long-form NEON:
+  // "fadd v0.2s, v1.2s, v2.2s"
+  if (MatchResult != Match_Success)
+    MatchResult =
+        MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 0);
+
+  switch (MatchResult) {
+  case Match_Success: {
+    // Perform range checking and other semantic validations
+    SmallVector<SMLoc, 8> OperandLocs;
+    NumOperands = Operands.size();
+    for (unsigned i = 1; i < NumOperands; ++i)
+      OperandLocs.push_back(Operands[i]->getStartLoc());
+    if (validateInstruction(Inst, OperandLocs))
+      return true;
+
+    Inst.setLoc(IDLoc);
+    Out.EmitInstruction(Inst, STI);
+    return false;
+  }
+  case Match_MissingFeature:
+  case Match_MnemonicFail:
+    return showMatchError(IDLoc, MatchResult);
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc())
+        ErrorLoc = IDLoc;
+    }
+    // If the match failed on a suffix token operand, tweak the diagnostic
+    // accordingly.
+    if (((ARM64Operand *)Operands[ErrorInfo])->isToken() &&
+        ((ARM64Operand *)Operands[ErrorInfo])->isTokenSuffix())
+      MatchResult = Match_InvalidSuffix;
+
+    return showMatchError(ErrorLoc, MatchResult);
+  }
+  case Match_InvalidMemoryIndexedSImm9: {
+    // If there is not a '!' after the memory operand that failed, we really
+    // want the diagnostic for the non-pre-indexed instruction variant instead.
+    // Be careful to check for the post-indexed variant as well, which also
+    // uses this match diagnostic. Also exclude the explicitly unscaled
+    // mnemonics, as they want the unscaled diagnostic as well.
+    if (Operands.size() == ErrorInfo + 1 &&
+        !((ARM64Operand *)Operands[ErrorInfo])->isImm() &&
+        !Tok.startswith("stur") && !Tok.startswith("ldur")) {
+      // whether we want an Indexed64 or Indexed32 diagnostic depends on
+      // the register class of the previous operand. Default to 64 in case
+      // we see something unexpected.
+      MatchResult = Match_InvalidMemoryIndexed64;
+      if (ErrorInfo) {
+        ARM64Operand *PrevOp = (ARM64Operand *)Operands[ErrorInfo - 1];
+        if (PrevOp->isReg() && ARM64MCRegisterClasses[ARM64::GPR32RegClassID]
+                                   .contains(PrevOp->getReg()))
+          MatchResult = Match_InvalidMemoryIndexed32;
+      }
+    }
+    SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc();
+    if (ErrorLoc == SMLoc())
+      ErrorLoc = IDLoc;
+    return showMatchError(ErrorLoc, MatchResult);
+  }
+  case Match_InvalidMemoryIndexed32:
+  case Match_InvalidMemoryIndexed64:
+  case Match_InvalidMemoryIndexed128:
+    // If there is a '!' after the memory operand that failed, we really
+    // want the diagnostic for the pre-indexed instruction variant instead.
+    if (Operands.size() > ErrorInfo + 1 &&
+        ((ARM64Operand *)Operands[ErrorInfo + 1])->isTokenEqual("!"))
+      MatchResult = Match_InvalidMemoryIndexedSImm9;
+  // FALL THROUGH
+  case Match_InvalidMemoryIndexed8:
+  case Match_InvalidMemoryIndexed16:
+  case Match_InvalidMemoryIndexed32SImm7:
+  case Match_InvalidMemoryIndexed64SImm7:
+  case Match_InvalidMemoryIndexed128SImm7:
+  case Match_InvalidImm1_8:
+  case Match_InvalidImm1_16:
+  case Match_InvalidImm1_32:
+  case Match_InvalidImm1_64: {
+    // Any time we get here, there's nothing fancy to do. Just get the
+    // operand SMLoc and display the diagnostic.
+    SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc();
+    // If it's a memory operand, the error is with the offset immediate,
+    // so get that location instead.
+    if (((ARM64Operand *)Operands[ErrorInfo])->isMem())
+      ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getOffsetLoc();
+    if (ErrorLoc == SMLoc())
+      ErrorLoc = IDLoc;
+    return showMatchError(ErrorLoc, MatchResult);
+  }
+  }
+
+  llvm_unreachable("Implement any new match types added!");
+  return true;
+}
+
+/// ParseDirective parses the arm specific directives
+bool ARM64AsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  SMLoc Loc = DirectiveID.getLoc();
+  if (IDVal == ".hword")
+    return parseDirectiveWord(2, Loc);
+  if (IDVal == ".word")
+    return parseDirectiveWord(4, Loc);
+  if (IDVal == ".xword")
+    return parseDirectiveWord(8, Loc);
+  if (IDVal == ".tlsdesccall")
+    return parseDirectiveTLSDescCall(Loc);
+
+  return parseDirectiveLOH(IDVal, Loc);
+}
+
+/// parseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool ARM64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().parseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+
+  Parser.Lex();
+  return false;
+}
+
+// parseDirectiveTLSDescCall:
+//   ::= .tlsdesccall symbol
+bool ARM64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
+  StringRef Name;
+  if (getParser().parseIdentifier(Name))
+    return Error(L, "expected symbol after directive");
+
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
+  Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_TLSDESC, getContext());
+
+  MCInst Inst;
+  Inst.setOpcode(ARM64::TLSDESCCALL);
+  Inst.addOperand(MCOperand::CreateExpr(Expr));
+
+  getParser().getStreamer().EmitInstruction(Inst, STI);
+  return false;
+}
+
+/// ::= .loh <lohName | lohId> label1, ..., labelN
+/// The number of arguments depends on the loh identifier.
+bool ARM64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
+  if (IDVal != MCLOHDirectiveName())
+    return true;
+  MCLOHType Kind;
+  if (getParser().getTok().isNot(AsmToken::Identifier)) {
+    if (getParser().getTok().isNot(AsmToken::Integer))
+      return TokError("expected an identifier or a number in directive");
+    // We successfully get a numeric value for the identifier.
+    // Check if it is valid.
+    int64_t Id = getParser().getTok().getIntVal();
+    Kind = (MCLOHType)Id;
+    // Check that Id does not overflow MCLOHType.
+    if (!isValidMCLOHType(Kind) || Id != Kind)
+      return TokError("invalid numeric identifier in directive");
+  } else {
+    StringRef Name = getTok().getIdentifier();
+    // We successfully parse an identifier.
+    // Check if it is a recognized one.
+    int Id = MCLOHNameToId(Name);
+
+    if (Id == -1)
+      return TokError("invalid identifier in directive");
+    Kind = (MCLOHType)Id;
+  }
+  // Consume the identifier.
+  Lex();
+  // Get the number of arguments of this LOH.
+  int NbArgs = MCLOHIdToNbArgs(Kind);
+
+  assert(NbArgs != -1 && "Invalid number of arguments");
+
+  SmallVector<MCSymbol *, 3> Args;
+  for (int Idx = 0; Idx < NbArgs; ++Idx) {
+    StringRef Name;
+    if (getParser().parseIdentifier(Name))
+      return TokError("expected identifier in directive");
+    Args.push_back(getContext().GetOrCreateSymbol(Name));
+
+    if (Idx + 1 == NbArgs)
+      break;
+    if (getLexer().isNot(AsmToken::Comma))
+      return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+    Lex();
+  }
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+
+  getStreamer().EmitLOHDirective((MCLOHType)Kind, Args);
+  return false;
+}
+
+bool
+ARM64AsmParser::classifySymbolRef(const MCExpr *Expr,
+                                  ARM64MCExpr::VariantKind &ELFRefKind,
+                                  MCSymbolRefExpr::VariantKind &DarwinRefKind,
+                                  const MCConstantExpr *&Addend) {
+  ELFRefKind = ARM64MCExpr::VK_INVALID;
+  DarwinRefKind = MCSymbolRefExpr::VK_None;
+
+  if (const ARM64MCExpr *AE = dyn_cast<ARM64MCExpr>(Expr)) {
+    ELFRefKind = AE->getKind();
+    Expr = AE->getSubExpr();
+  }
+
+  const MCSymbolRefExpr *SE = dyn_cast<MCSymbolRefExpr>(Expr);
+  if (SE) {
+    // It's a simple symbol reference with no addend.
+    DarwinRefKind = SE->getKind();
+    Addend = 0;
+    return true;
+  }
+
+  const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr);
+  if (!BE)
+    return false;
+
+  SE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
+  if (!SE)
+    return false;
+  DarwinRefKind = SE->getKind();
+
+  if (BE->getOpcode() != MCBinaryExpr::Add)
+    return false;
+
+  // See if the addend is is a constant, otherwise there's more going
+  // on here than we can deal with.
+  Addend = dyn_cast<MCConstantExpr>(BE->getRHS());
+  if (!Addend)
+    return false;
+
+  // It's some symbol reference + a constant addend, but really
+  // shouldn't use both Darwin and ELF syntax.
+  return ELFRefKind == ARM64MCExpr::VK_INVALID ||
+         DarwinRefKind == MCSymbolRefExpr::VK_None;
+}
+
+/// Force static initialization.
+extern "C" void LLVMInitializeARM64AsmParser() {
+  RegisterMCAsmParser<ARM64AsmParser> X(TheARM64Target);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "ARM64GenAsmMatcher.inc"
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned ARM64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+                                                    unsigned Kind) {
+  ARM64Operand *Op = static_cast<ARM64Operand *>(AsmOp);
+  // If the kind is a token for a literal immediate, check if our asm
+  // operand matches. This is for InstAliases which have a fixed-value
+  // immediate in the syntax.
+  int64_t ExpectedVal;
+  switch (Kind) {
+  default:
+    return Match_InvalidOperand;
+  case MCK__35_0:
+    ExpectedVal = 0;
+    break;
+  case MCK__35_1:
+    ExpectedVal = 1;
+    break;
+  case MCK__35_12:
+    ExpectedVal = 12;
+    break;
+  case MCK__35_16:
+    ExpectedVal = 16;
+    break;
+  case MCK__35_2:
+    ExpectedVal = 2;
+    break;
+  case MCK__35_24:
+    ExpectedVal = 24;
+    break;
+  case MCK__35_3:
+    ExpectedVal = 3;
+    break;
+  case MCK__35_32:
+    ExpectedVal = 32;
+    break;
+  case MCK__35_4:
+    ExpectedVal = 4;
+    break;
+  case MCK__35_48:
+    ExpectedVal = 48;
+    break;
+  case MCK__35_6:
+    ExpectedVal = 6;
+    break;
+  case MCK__35_64:
+    ExpectedVal = 64;
+    break;
+  case MCK__35_8:
+    ExpectedVal = 8;
+    break;
+  }
+  if (!Op->isImm())
+    return Match_InvalidOperand;
+  const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+  if (!CE)
+    return Match_InvalidOperand;
+  if (CE->getValue() == ExpectedVal)
+    return Match_Success;
+  return Match_InvalidOperand;
+}

Added: llvm/trunk/lib/Target/ARM64/AsmParser/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/AsmParser/CMakeLists.txt?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/AsmParser/CMakeLists.txt (added)
+++ llvm/trunk/lib/Target/ARM64/AsmParser/CMakeLists.txt Sat Mar 29 05:18:08 2014
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARM64AsmParser
+  ARM64AsmParser.cpp
+  )
+

Added: llvm/trunk/lib/Target/ARM64/AsmParser/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/AsmParser/LLVMBuild.txt?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/AsmParser/LLVMBuild.txt (added)
+++ llvm/trunk/lib/Target/ARM64/AsmParser/LLVMBuild.txt Sat Mar 29 05:18:08 2014
@@ -0,0 +1,24 @@
+;===- ./lib/Target/ARM64/AsmParser/LLVMBuild.txt ---------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARM64AsmParser
+parent = ARM64
+required_libraries = ARM64Desc ARM64Info MC MCParser Support
+add_to_library_groups = ARM64
+

Added: llvm/trunk/lib/Target/ARM64/AsmParser/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/AsmParser/Makefile?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/AsmParser/Makefile (added)
+++ llvm/trunk/lib/Target/ARM64/AsmParser/Makefile Sat Mar 29 05:18:08 2014
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARM64AsmParser
+
+# Hack: we need to include 'main' ARM target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common

Added: llvm/trunk/lib/Target/ARM64/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/CMakeLists.txt?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/CMakeLists.txt (added)
+++ llvm/trunk/lib/Target/ARM64/CMakeLists.txt Sat Mar 29 05:18:08 2014
@@ -0,0 +1,50 @@
+set(LLVM_TARGET_DEFINITIONS ARM64.td)
+
+tablegen(LLVM ARM64GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM ARM64GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM ARM64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM ARM64GenMCPseudoLowering.inc -gen-pseudo-lowering)
+tablegen(LLVM ARM64GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM ARM64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
+tablegen(LLVM ARM64GenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM ARM64GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM ARM64GenFastISel.inc -gen-fast-isel)
+tablegen(LLVM ARM64GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM ARM64GenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM ARM64GenDisassemblerTables.inc -gen-disassembler)
+add_public_tablegen_target(ARM64CommonTableGen)
+
+add_llvm_target(ARM64CodeGen
+  ARM64AddressTypePromotion.cpp
+  ARM64AdvSIMDScalarPass.cpp
+  ARM64AsmPrinter.cpp
+  ARM64BranchRelaxation.cpp
+  ARM64CleanupLocalDynamicTLSPass.cpp
+  ARM64CollectLOH.cpp
+  ARM64ConditionalCompares.cpp
+  ARM64DeadRegisterDefinitionsPass.cpp
+  ARM64ExpandPseudoInsts.cpp
+  ARM64FastISel.cpp
+  ARM64FrameLowering.cpp
+  ARM64ISelDAGToDAG.cpp
+  ARM64ISelLowering.cpp
+  ARM64InstrInfo.cpp
+  ARM64LoadStoreOptimizer.cpp
+  ARM64MCInstLower.cpp
+  ARM64PromoteConstant.cpp
+  ARM64RegisterInfo.cpp
+  ARM64SelectionDAGInfo.cpp
+  ARM64StorePairSuppress.cpp
+  ARM64Subtarget.cpp
+  ARM64TargetMachine.cpp
+  ARM64TargetObjectFile.cpp
+  ARM64TargetTransformInfo.cpp
+)
+
+add_dependencies(LLVMARM64CodeGen intrinsics_gen)
+
+add_subdirectory(TargetInfo)
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)

Added: llvm/trunk/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp (added)
+++ llvm/trunk/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp Sat Mar 29 05:18:08 2014
@@ -0,0 +1,2142 @@
+//===- ARM64Disassembler.cpp - Disassembler for ARM64 -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm64-disassembler"
+
+#include "ARM64Disassembler.h"
+#include "ARM64Subtarget.h"
+#include "MCTargetDesc/ARM64BaseInfo.h"
+#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+// Pull DecodeStatus and its enum values into the global namespace.
+typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
+
+// Forward declare these because the autogenerated code will reference them.
+// Definitions are further down.
+static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
+                                              unsigned RegNo, uint64_t Address,
+                                              const void *Decoder);
+static DecodeStatus DecodeFPR128_loRegisterClass(llvm::MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder);
+static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                             uint64_t Address,
+                                             const void *Decoder);
+static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                             uint64_t Address,
+                                             const void *Decoder);
+static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                             uint64_t Address,
+                                             const void *Decoder);
+static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder);
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                             uint64_t Address,
+                                             const void *Decoder);
+static DecodeStatus DecodeGPR64spRegisterClass(llvm::MCInst &Inst,
+                                               unsigned RegNo, uint64_t Address,
+                                               const void *Decoder);
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                             uint64_t Address,
+                                             const void *Decoder);
+static DecodeStatus DecodeGPR32spRegisterClass(llvm::MCInst &Inst,
+                                               unsigned RegNo, uint64_t Address,
+                                               const void *Decoder);
+static DecodeStatus DecodeQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                          uint64_t Address,
+                                          const void *Decoder);
+static DecodeStatus DecodeQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                           uint64_t Address,
+                                           const void *Decoder);
+static DecodeStatus DecodeQQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder);
+static DecodeStatus DecodeDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                          uint64_t Address,
+                                          const void *Decoder);
+static DecodeStatus DecodeDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                           uint64_t Address,
+                                           const void *Decoder);
+static DecodeStatus DecodeDDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder);
+
+static DecodeStatus DecodeFixedPointScaleImm(llvm::MCInst &Inst, unsigned Imm,
+                                             uint64_t Address,
+                                             const void *Decoder);
+static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm,
+                                           uint64_t Address,
+                                           const void *Decoder);
+static DecodeStatus DecodeSystemRegister(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
+                                                   uint32_t insn,
+                                                   uint64_t Address,
+                                                   const void *Decoder);
+static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
+                                             uint64_t Address,
+                                             const void *Decoder);
+static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
+                                                  uint32_t insn,
+                                                  uint64_t Address,
+                                                  const void *Decoder);
+static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
+                                                uint32_t insn, uint64_t Address,
+                                                const void *Decoder);
+static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
+                                                   uint32_t insn,
+                                                   uint64_t Address,
+                                                   const void *Decoder);
+static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
+                                              uint64_t Address,
+                                              const void *Decoder);
+static DecodeStatus DecodeRegOffsetLdStInstruction(llvm::MCInst &Inst,
+                                                   uint32_t insn,
+                                                   uint64_t Address,
+                                                   const void *Decoder);
+static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
+                                                uint32_t insn, uint64_t Address,
+                                                const void *Decoder);
+static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
+                                                uint32_t insn, uint64_t Address,
+                                                const void *Decoder);
+static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
+                                            uint64_t Address,
+                                            const void *Decoder);
+static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst,
+                                                uint32_t insn, uint64_t Address,
+                                                const void *Decoder);
+static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
+                                        uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
+                                              uint64_t Address,
+                                              const void *Decoder);
+static DecodeStatus DecodeSystemCPSRInstruction(llvm::MCInst &Inst,
+                                                uint32_t insn, uint64_t Address,
+                                                const void *Decoder);
+static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
+                                        uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSIMDLdStPost(llvm::MCInst &Inst, uint32_t insn,
+                                       uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeSIMDLdStSingle(llvm::MCInst &Inst, uint32_t insn,
+                                         uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeSIMDLdStSingleTied(llvm::MCInst &Inst, uint32_t insn,
+                                             uint64_t Addr,
+                                             const void *Decoder);
+
+static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+                                               uint64_t Addr,
+                                               const void *Decoder);
+static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+                                               uint64_t Addr,
+                                               const void *Decoder);
+static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+                                               uint64_t Addr,
+                                               const void *Decoder);
+static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm,
+                                        uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm,
+                                        uint64_t Addr, const void *Decoder);
+
+#include "ARM64GenDisassemblerTables.inc"
+#include "ARM64GenInstrInfo.inc"
+
+using namespace llvm;
+
+#define Success llvm::MCDisassembler::Success
+#define Fail llvm::MCDisassembler::Fail
+
+static MCDisassembler *createARM64Disassembler(const Target &T,
+                                               const MCSubtargetInfo &STI) {
+  return new ARM64Disassembler(STI);
+}
+
+DecodeStatus ARM64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
+                                               const MemoryObject &Region,
+                                               uint64_t Address,
+                                               raw_ostream &os,
+                                               raw_ostream &cs) const {
+  CommentStream = &cs;
+
+  uint8_t bytes[4];
+
+  // We want to read exactly 4 bytes of data.
+  if (Region.readBytes(Address, 4, (uint8_t *)bytes) == -1)
+    return Fail;
+
+  // Encoded as a small-endian 32-bit word in the stream.
+  uint32_t insn =
+      (bytes[3] << 24) | (bytes[2] << 16) | (bytes[1] << 8) | (bytes[0] << 0);
+
+  // Calling the auto-generated decoder function.
+  DecodeStatus result =
+      decodeInstruction(DecoderTable32, MI, insn, Address, this, STI);
+  if (!result)
+    return Fail;
+
+  Size = 4;
+
+  return Success;
+}
+
+static MCSymbolRefExpr::VariantKind
+getVariant(uint64_t LLVMDisassembler_VariantKind) {
+  switch (LLVMDisassembler_VariantKind) {
+  case LLVMDisassembler_VariantKind_None:
+    return MCSymbolRefExpr::VK_None;
+  case LLVMDisassembler_VariantKind_ARM64_PAGE:
+    return MCSymbolRefExpr::VK_PAGE;
+  case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
+    return MCSymbolRefExpr::VK_PAGEOFF;
+  case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
+    return MCSymbolRefExpr::VK_GOTPAGE;
+  case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
+    return MCSymbolRefExpr::VK_GOTPAGEOFF;
+  case LLVMDisassembler_VariantKind_ARM64_TLVP:
+  case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
+  default:
+    assert("bad LLVMDisassembler_VariantKind");
+    return MCSymbolRefExpr::VK_None;
+  }
+}
+
+/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
+/// operand in place of the immediate Value in the MCInst.  The immediate
+/// Value has not had any PC adjustment made by the caller. If the instruction
+/// is a branch that adds the PC to the immediate Value then isBranch is
+/// Success, else Fail.  If the getOpInfo() function was set as part of the
+/// setupForSymbolicDisassembly() call then that function is called to get any
+/// symbolic information at the Address for this instrution.  If that returns
+/// non-zero then the symbolic information it returns is used to create an
+/// MCExpr and that is added as an operand to the MCInst.  If getOpInfo()
+/// returns zero and isBranch is Success then a symbol look up for
+/// Address + Value is done and if a symbol is found an MCExpr is created with
+/// that, else an MCExpr with Address + Value is created.  If getOpInfo()
+/// returns zero and isBranch is Fail then the the Opcode of the MCInst is
+/// tested and for ADRP an other instructions that help to load of pointers
+/// a symbol look up is done to see it is returns a specific reference type
+/// to add to the comment stream.  This function returns Success if it adds
+/// an operand to the MCInst and Fail otherwise.
+bool ARM64Disassembler::tryAddingSymbolicOperand(uint64_t Address, int Value,
+                                                 bool isBranch,
+                                                 uint64_t InstSize, MCInst &MI,
+                                                 uint32_t insn) const {
+  LLVMOpInfoCallback getOpInfo = getLLVMOpInfoCallback();
+
+  struct LLVMOpInfo1 SymbolicOp;
+  memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+  SymbolicOp.Value = Value;
+  void *DisInfo = getDisInfoBlock();
+  uint64_t ReferenceType;
+  const char *ReferenceName;
+  const char *Name;
+  LLVMSymbolLookupCallback SymbolLookUp = getLLVMSymbolLookupCallback();
+  if (!getOpInfo ||
+      !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+    if (isBranch) {
+      if (SymbolLookUp) {
+        ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
+        Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
+                            &ReferenceName);
+        if (Name) {
+          SymbolicOp.AddSymbol.Name = Name;
+          SymbolicOp.AddSymbol.Present = Success;
+          SymbolicOp.Value = 0;
+        } else {
+          SymbolicOp.Value = Address + Value;
+        }
+        if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
+          (*CommentStream) << "symbol stub for: " << ReferenceName;
+        else if (ReferenceType ==
+                 LLVMDisassembler_ReferenceType_Out_Objc_Message)
+          (*CommentStream) << "Objc message: " << ReferenceName;
+      } else {
+        return false;
+      }
+    } else if (MI.getOpcode() == ARM64::ADRP) {
+      if (SymbolLookUp) {
+        ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
+        Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address,
+                            &ReferenceName);
+        (*CommentStream) << format("0x%llx",
+                                   0xfffffffffffff000LL & (Address + Value));
+      } else {
+        return false;
+      }
+    } else if (MI.getOpcode() == ARM64::ADDXri ||
+               MI.getOpcode() == ARM64::LDRXui ||
+               MI.getOpcode() == ARM64::LDRXl || MI.getOpcode() == ARM64::ADR) {
+      if (SymbolLookUp) {
+        if (MI.getOpcode() == ARM64::ADDXri)
+          ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
+        else if (MI.getOpcode() == ARM64::LDRXui)
+          ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
+        if (MI.getOpcode() == ARM64::LDRXl) {
+          ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
+          Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
+                              &ReferenceName);
+        } else if (MI.getOpcode() == ARM64::ADR) {
+          ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
+          Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
+                              &ReferenceName);
+        } else {
+          Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address,
+                              &ReferenceName);
+        }
+        if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
+          (*CommentStream) << "literal pool symbol address: " << ReferenceName;
+        else if (ReferenceType ==
+                 LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
+          (*CommentStream) << "literal pool for: \"" << ReferenceName << "\"";
+        else if (ReferenceType ==
+                 LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
+          (*CommentStream) << "Objc cfstring ref: @\"" << ReferenceName << "\"";
+        else if (ReferenceType ==
+                 LLVMDisassembler_ReferenceType_Out_Objc_Message)
+          (*CommentStream) << "Objc message: " << ReferenceName;
+        else if (ReferenceType ==
+                 LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
+          (*CommentStream) << "Objc message ref: " << ReferenceName;
+        else if (ReferenceType ==
+                 LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
+          (*CommentStream) << "Objc selector ref: " << ReferenceName;
+        else if (ReferenceType ==
+                 LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
+          (*CommentStream) << "Objc class ref: " << ReferenceName;
+        // For these instructions, the SymbolLookUp() above is just to get the
+        // ReferenceType and ReferenceName.  We want to make sure not to
+        // fall through so we don't build an MCExpr to leave the disassembly
+        // of the immediate values of these instructions to the InstPrinter.
+        return false;
+      } else {
+        return false;
+      }
+    } else {
+      return false;
+    }
+  }
+
+  MCContext *Ctx = getMCContext();
+  const MCExpr *Add = NULL;
+  if (SymbolicOp.AddSymbol.Present) {
+    if (SymbolicOp.AddSymbol.Name) {
+      StringRef Name(SymbolicOp.AddSymbol.Name);
+      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+      MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
+      if (Variant != MCSymbolRefExpr::VK_None)
+        Add = MCSymbolRefExpr::Create(Sym, Variant, *Ctx);
+      else
+        Add = MCSymbolRefExpr::Create(Sym, *Ctx);
+    } else {
+      Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
+    }
+  }
+
+  const MCExpr *Sub = NULL;
+  if (SymbolicOp.SubtractSymbol.Present) {
+    if (SymbolicOp.SubtractSymbol.Name) {
+      StringRef Name(SymbolicOp.SubtractSymbol.Name);
+      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+      Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
+    } else {
+      Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
+    }
+  }
+
+  const MCExpr *Off = NULL;
+  if (SymbolicOp.Value != 0)
+    Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
+
+  const MCExpr *Expr;
+  if (Sub) {
+    const MCExpr *LHS;
+    if (Add)
+      LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
+    else
+      LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
+    if (Off != 0)
+      Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
+    else
+      Expr = LHS;
+  } else if (Add) {
+    if (Off != 0)
+      Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
+    else
+      Expr = Add;
+  } else {
+    if (Off != 0)
+      Expr = Off;
+    else
+      Expr = MCConstantExpr::Create(0, *Ctx);
+  }
+
+  MI.addOperand(MCOperand::CreateExpr(Expr));
+
+  return true;
+}
+
+extern "C" void LLVMInitializeARM64Disassembler() {
+  TargetRegistry::RegisterMCDisassembler(TheARM64Target,
+                                         createARM64Disassembler);
+}
+
+static const unsigned FPR128DecoderTable[] = {
+  ARM64::Q0,  ARM64::Q1,  ARM64::Q2,  ARM64::Q3,  ARM64::Q4,  ARM64::Q5,
+  ARM64::Q6,  ARM64::Q7,  ARM64::Q8,  ARM64::Q9,  ARM64::Q10, ARM64::Q11,
+  ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17,
+  ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23,
+  ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29,
+  ARM64::Q30, ARM64::Q31
+};
+
+static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
+                                              uint64_t Addr,
+                                              const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+
+  unsigned Register = FPR128DecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                 uint64_t Addr,
+                                                 const void *Decoder) {
+  if (RegNo > 15)
+    return Fail;
+  return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder);
+}
+
+static const unsigned FPR64DecoderTable[] = {
+  ARM64::D0,  ARM64::D1,  ARM64::D2,  ARM64::D3,  ARM64::D4,  ARM64::D5,
+  ARM64::D6,  ARM64::D7,  ARM64::D8,  ARM64::D9,  ARM64::D10, ARM64::D11,
+  ARM64::D12, ARM64::D13, ARM64::D14, ARM64::D15, ARM64::D16, ARM64::D17,
+  ARM64::D18, ARM64::D19, ARM64::D20, ARM64::D21, ARM64::D22, ARM64::D23,
+  ARM64::D24, ARM64::D25, ARM64::D26, ARM64::D27, ARM64::D28, ARM64::D29,
+  ARM64::D30, ARM64::D31
+};
+
+static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
+                                             uint64_t Addr,
+                                             const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+
+  unsigned Register = FPR64DecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static const unsigned FPR32DecoderTable[] = {
+  ARM64::S0,  ARM64::S1,  ARM64::S2,  ARM64::S3,  ARM64::S4,  ARM64::S5,
+  ARM64::S6,  ARM64::S7,  ARM64::S8,  ARM64::S9,  ARM64::S10, ARM64::S11,
+  ARM64::S12, ARM64::S13, ARM64::S14, ARM64::S15, ARM64::S16, ARM64::S17,
+  ARM64::S18, ARM64::S19, ARM64::S20, ARM64::S21, ARM64::S22, ARM64::S23,
+  ARM64::S24, ARM64::S25, ARM64::S26, ARM64::S27, ARM64::S28, ARM64::S29,
+  ARM64::S30, ARM64::S31
+};
+
+static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
+                                             uint64_t Addr,
+                                             const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+
+  unsigned Register = FPR32DecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static const unsigned FPR16DecoderTable[] = {
+  ARM64::H0,  ARM64::H1,  ARM64::H2,  ARM64::H3,  ARM64::H4,  ARM64::H5,
+  ARM64::H6,  ARM64::H7,  ARM64::H8,  ARM64::H9,  ARM64::H10, ARM64::H11,
+  ARM64::H12, ARM64::H13, ARM64::H14, ARM64::H15, ARM64::H16, ARM64::H17,
+  ARM64::H18, ARM64::H19, ARM64::H20, ARM64::H21, ARM64::H22, ARM64::H23,
+  ARM64::H24, ARM64::H25, ARM64::H26, ARM64::H27, ARM64::H28, ARM64::H29,
+  ARM64::H30, ARM64::H31
+};
+
+static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
+                                             uint64_t Addr,
+                                             const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+
+  unsigned Register = FPR16DecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static const unsigned FPR8DecoderTable[] = {
+  ARM64::B0,  ARM64::B1,  ARM64::B2,  ARM64::B3,  ARM64::B4,  ARM64::B5,
+  ARM64::B6,  ARM64::B7,  ARM64::B8,  ARM64::B9,  ARM64::B10, ARM64::B11,
+  ARM64::B12, ARM64::B13, ARM64::B14, ARM64::B15, ARM64::B16, ARM64::B17,
+  ARM64::B18, ARM64::B19, ARM64::B20, ARM64::B21, ARM64::B22, ARM64::B23,
+  ARM64::B24, ARM64::B25, ARM64::B26, ARM64::B27, ARM64::B28, ARM64::B29,
+  ARM64::B30, ARM64::B31
+};
+
+static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
+                                            uint64_t Addr,
+                                            const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+
+  unsigned Register = FPR8DecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static const unsigned GPR64DecoderTable[] = {
+  ARM64::X0,  ARM64::X1,  ARM64::X2,  ARM64::X3,  ARM64::X4,  ARM64::X5,
+  ARM64::X6,  ARM64::X7,  ARM64::X8,  ARM64::X9,  ARM64::X10, ARM64::X11,
+  ARM64::X12, ARM64::X13, ARM64::X14, ARM64::X15, ARM64::X16, ARM64::X17,
+  ARM64::X18, ARM64::X19, ARM64::X20, ARM64::X21, ARM64::X22, ARM64::X23,
+  ARM64::X24, ARM64::X25, ARM64::X26, ARM64::X27, ARM64::X28, ARM64::FP,
+  ARM64::LR,  ARM64::XZR
+};
+
+static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
+                                             uint64_t Addr,
+                                             const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+
+  unsigned Register = GPR64DecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t Addr,
+                                               const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+  unsigned Register = GPR64DecoderTable[RegNo];
+  if (Register == ARM64::XZR)
+    Register = ARM64::SP;
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static const unsigned GPR32DecoderTable[] = {
+  ARM64::W0,  ARM64::W1,  ARM64::W2,  ARM64::W3,  ARM64::W4,  ARM64::W5,
+  ARM64::W6,  ARM64::W7,  ARM64::W8,  ARM64::W9,  ARM64::W10, ARM64::W11,
+  ARM64::W12, ARM64::W13, ARM64::W14, ARM64::W15, ARM64::W16, ARM64::W17,
+  ARM64::W18, ARM64::W19, ARM64::W20, ARM64::W21, ARM64::W22, ARM64::W23,
+  ARM64::W24, ARM64::W25, ARM64::W26, ARM64::W27, ARM64::W28, ARM64::W29,
+  ARM64::W30, ARM64::WZR
+};
+
+static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
+                                             uint64_t Addr,
+                                             const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+
+  unsigned Register = GPR32DecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t Addr,
+                                               const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+
+  unsigned Register = GPR32DecoderTable[RegNo];
+  if (Register == ARM64::WZR)
+    Register = ARM64::WSP;
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static const unsigned VectorDecoderTable[] = {
+  ARM64::Q0,  ARM64::Q1,  ARM64::Q2,  ARM64::Q3,  ARM64::Q4,  ARM64::Q5,
+  ARM64::Q6,  ARM64::Q7,  ARM64::Q8,  ARM64::Q9,  ARM64::Q10, ARM64::Q11,
+  ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17,
+  ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23,
+  ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29,
+  ARM64::Q30, ARM64::Q31
+};
+
+static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo,
+                                              uint64_t Addr,
+                                              const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+
+  unsigned Register = VectorDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static const unsigned QQDecoderTable[] = {
+  ARM64::Q0_Q1,   ARM64::Q1_Q2,   ARM64::Q2_Q3,   ARM64::Q3_Q4,
+  ARM64::Q4_Q5,   ARM64::Q5_Q6,   ARM64::Q6_Q7,   ARM64::Q7_Q8,
+  ARM64::Q8_Q9,   ARM64::Q9_Q10,  ARM64::Q10_Q11, ARM64::Q11_Q12,
+  ARM64::Q12_Q13, ARM64::Q13_Q14, ARM64::Q14_Q15, ARM64::Q15_Q16,
+  ARM64::Q16_Q17, ARM64::Q17_Q18, ARM64::Q18_Q19, ARM64::Q19_Q20,
+  ARM64::Q20_Q21, ARM64::Q21_Q22, ARM64::Q22_Q23, ARM64::Q23_Q24,
+  ARM64::Q24_Q25, ARM64::Q25_Q26, ARM64::Q26_Q27, ARM64::Q27_Q28,
+  ARM64::Q28_Q29, ARM64::Q29_Q30, ARM64::Q30_Q31, ARM64::Q31_Q0
+};
+
+static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
+                                          uint64_t Addr, const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+  unsigned Register = QQDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static const unsigned QQQDecoderTable[] = {
+  ARM64::Q0_Q1_Q2,    ARM64::Q1_Q2_Q3,    ARM64::Q2_Q3_Q4,
+  ARM64::Q3_Q4_Q5,    ARM64::Q4_Q5_Q6,    ARM64::Q5_Q6_Q7,
+  ARM64::Q6_Q7_Q8,    ARM64::Q7_Q8_Q9,    ARM64::Q8_Q9_Q10,
+  ARM64::Q9_Q10_Q11,  ARM64::Q10_Q11_Q12, ARM64::Q11_Q12_Q13,
+  ARM64::Q12_Q13_Q14, ARM64::Q13_Q14_Q15, ARM64::Q14_Q15_Q16,
+  ARM64::Q15_Q16_Q17, ARM64::Q16_Q17_Q18, ARM64::Q17_Q18_Q19,
+  ARM64::Q18_Q19_Q20, ARM64::Q19_Q20_Q21, ARM64::Q20_Q21_Q22,
+  ARM64::Q21_Q22_Q23, ARM64::Q22_Q23_Q24, ARM64::Q23_Q24_Q25,
+  ARM64::Q24_Q25_Q26, ARM64::Q25_Q26_Q27, ARM64::Q26_Q27_Q28,
+  ARM64::Q27_Q28_Q29, ARM64::Q28_Q29_Q30, ARM64::Q29_Q30_Q31,
+  ARM64::Q30_Q31_Q0,  ARM64::Q31_Q0_Q1
+};
+
+static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
+                                           uint64_t Addr, const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+  unsigned Register = QQQDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static const unsigned QQQQDecoderTable[] = {
+  ARM64::Q0_Q1_Q2_Q3,     ARM64::Q1_Q2_Q3_Q4,     ARM64::Q2_Q3_Q4_Q5,
+  ARM64::Q3_Q4_Q5_Q6,     ARM64::Q4_Q5_Q6_Q7,     ARM64::Q5_Q6_Q7_Q8,
+  ARM64::Q6_Q7_Q8_Q9,     ARM64::Q7_Q8_Q9_Q10,    ARM64::Q8_Q9_Q10_Q11,
+  ARM64::Q9_Q10_Q11_Q12,  ARM64::Q10_Q11_Q12_Q13, ARM64::Q11_Q12_Q13_Q14,
+  ARM64::Q12_Q13_Q14_Q15, ARM64::Q13_Q14_Q15_Q16, ARM64::Q14_Q15_Q16_Q17,
+  ARM64::Q15_Q16_Q17_Q18, ARM64::Q16_Q17_Q18_Q19, ARM64::Q17_Q18_Q19_Q20,
+  ARM64::Q18_Q19_Q20_Q21, ARM64::Q19_Q20_Q21_Q22, ARM64::Q20_Q21_Q22_Q23,
+  ARM64::Q21_Q22_Q23_Q24, ARM64::Q22_Q23_Q24_Q25, ARM64::Q23_Q24_Q25_Q26,
+  ARM64::Q24_Q25_Q26_Q27, ARM64::Q25_Q26_Q27_Q28, ARM64::Q26_Q27_Q28_Q29,
+  ARM64::Q27_Q28_Q29_Q30, ARM64::Q28_Q29_Q30_Q31, ARM64::Q29_Q30_Q31_Q0,
+  ARM64::Q30_Q31_Q0_Q1,   ARM64::Q31_Q0_Q1_Q2
+};
+
+static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
+                                            uint64_t Addr,
+                                            const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+  unsigned Register = QQQQDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static const unsigned DDDecoderTable[] = {
+  ARM64::D0_D1,   ARM64::D1_D2,   ARM64::D2_D3,   ARM64::D3_D4,
+  ARM64::D4_D5,   ARM64::D5_D6,   ARM64::D6_D7,   ARM64::D7_D8,
+  ARM64::D8_D9,   ARM64::D9_D10,  ARM64::D10_D11, ARM64::D11_D12,
+  ARM64::D12_D13, ARM64::D13_D14, ARM64::D14_D15, ARM64::D15_D16,
+  ARM64::D16_D17, ARM64::D17_D18, ARM64::D18_D19, ARM64::D19_D20,
+  ARM64::D20_D21, ARM64::D21_D22, ARM64::D22_D23, ARM64::D23_D24,
+  ARM64::D24_D25, ARM64::D25_D26, ARM64::D26_D27, ARM64::D27_D28,
+  ARM64::D28_D29, ARM64::D29_D30, ARM64::D30_D31, ARM64::D31_D0
+};
+
+static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
+                                          uint64_t Addr, const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+  unsigned Register = DDDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static const unsigned DDDDecoderTable[] = {
+  ARM64::D0_D1_D2,    ARM64::D1_D2_D3,    ARM64::D2_D3_D4,
+  ARM64::D3_D4_D5,    ARM64::D4_D5_D6,    ARM64::D5_D6_D7,
+  ARM64::D6_D7_D8,    ARM64::D7_D8_D9,    ARM64::D8_D9_D10,
+  ARM64::D9_D10_D11,  ARM64::D10_D11_D12, ARM64::D11_D12_D13,
+  ARM64::D12_D13_D14, ARM64::D13_D14_D15, ARM64::D14_D15_D16,
+  ARM64::D15_D16_D17, ARM64::D16_D17_D18, ARM64::D17_D18_D19,
+  ARM64::D18_D19_D20, ARM64::D19_D20_D21, ARM64::D20_D21_D22,
+  ARM64::D21_D22_D23, ARM64::D22_D23_D24, ARM64::D23_D24_D25,
+  ARM64::D24_D25_D26, ARM64::D25_D26_D27, ARM64::D26_D27_D28,
+  ARM64::D27_D28_D29, ARM64::D28_D29_D30, ARM64::D29_D30_D31,
+  ARM64::D30_D31_D0,  ARM64::D31_D0_D1
+};
+
+static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
+                                           uint64_t Addr, const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+  unsigned Register = DDDDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static const unsigned DDDDDecoderTable[] = {
+  ARM64::D0_D1_D2_D3,     ARM64::D1_D2_D3_D4,     ARM64::D2_D3_D4_D5,
+  ARM64::D3_D4_D5_D6,     ARM64::D4_D5_D6_D7,     ARM64::D5_D6_D7_D8,
+  ARM64::D6_D7_D8_D9,     ARM64::D7_D8_D9_D10,    ARM64::D8_D9_D10_D11,
+  ARM64::D9_D10_D11_D12,  ARM64::D10_D11_D12_D13, ARM64::D11_D12_D13_D14,
+  ARM64::D12_D13_D14_D15, ARM64::D13_D14_D15_D16, ARM64::D14_D15_D16_D17,
+  ARM64::D15_D16_D17_D18, ARM64::D16_D17_D18_D19, ARM64::D17_D18_D19_D20,
+  ARM64::D18_D19_D20_D21, ARM64::D19_D20_D21_D22, ARM64::D20_D21_D22_D23,
+  ARM64::D21_D22_D23_D24, ARM64::D22_D23_D24_D25, ARM64::D23_D24_D25_D26,
+  ARM64::D24_D25_D26_D27, ARM64::D25_D26_D27_D28, ARM64::D26_D27_D28_D29,
+  ARM64::D27_D28_D29_D30, ARM64::D28_D29_D30_D31, ARM64::D29_D30_D31_D0,
+  ARM64::D30_D31_D0_D1,   ARM64::D31_D0_D1_D2
+};
+
+static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
+                                            uint64_t Addr,
+                                            const void *Decoder) {
+  if (RegNo > 31)
+    return Fail;
+  unsigned Register = DDDDDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return Success;
+}
+
+static DecodeStatus DecodeFixedPointScaleImm(llvm::MCInst &Inst, unsigned Imm,
+                                             uint64_t Addr,
+                                             const void *Decoder) {
+  Inst.addOperand(MCOperand::CreateImm(64 - Imm));
+  return Success;
+}
+
+static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm,
+                                           uint64_t Addr, const void *Decoder) {
+  int64_t ImmVal = Imm;
+  const ARM64Disassembler *Dis =
+      static_cast<const ARM64Disassembler *>(Decoder);
+
+  // Sign-extend 19-bit immediate.
+  if (ImmVal & (1 << (19 - 1)))
+    ImmVal |= ~((1LL << 19) - 1);
+
+  if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal << 2,
+                                     Inst.getOpcode() != ARM64::LDRXl, 4, Inst))
+    Inst.addOperand(MCOperand::CreateImm(ImmVal));
+  return Success;
+}
+
+static DecodeStatus DecodeSystemRegister(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Address,
+                                         const void *Decoder) {
+  Inst.addOperand(MCOperand::CreateImm(Imm | 0x8000));
+  return Success;
+}
+
+static DecodeStatus DecodeVecShiftRImm(llvm::MCInst &Inst, unsigned Imm,
+                                       unsigned Add) {
+  Inst.addOperand(MCOperand::CreateImm(Add - Imm));
+  return Success;
+}
+
+static DecodeStatus DecodeVecShiftLImm(llvm::MCInst &Inst, unsigned Imm,
+                                       unsigned Add) {
+  Inst.addOperand(MCOperand::CreateImm((Imm + Add) & (Add - 1)));
+  return Success;
+}
+
+static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Addr, const void *Decoder) {
+  return DecodeVecShiftRImm(Inst, Imm, 64);
+}
+
+static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+                                               uint64_t Addr,
+                                               const void *Decoder) {
+  return DecodeVecShiftRImm(Inst, Imm | 0x20, 64);
+}
+
+static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Addr, const void *Decoder) {
+  return DecodeVecShiftRImm(Inst, Imm, 32);
+}
+
+static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+                                               uint64_t Addr,
+                                               const void *Decoder) {
+  return DecodeVecShiftRImm(Inst, Imm | 0x10, 32);
+}
+
+static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Addr, const void *Decoder) {
+  return DecodeVecShiftRImm(Inst, Imm, 16);
+}
+
+static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+                                               uint64_t Addr,
+                                               const void *Decoder) {
+  return DecodeVecShiftRImm(Inst, Imm | 0x8, 16);
+}
+
+static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm,
+                                        uint64_t Addr, const void *Decoder) {
+  return DecodeVecShiftRImm(Inst, Imm, 8);
+}
+
+static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Addr, const void *Decoder) {
+  return DecodeVecShiftLImm(Inst, Imm, 64);
+}
+
+static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Addr, const void *Decoder) {
+  return DecodeVecShiftLImm(Inst, Imm, 32);
+}
+
+static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm,
+                                         uint64_t Addr, const void *Decoder) {
+  return DecodeVecShiftLImm(Inst, Imm, 16);
+}
+
+static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm,
+                                        uint64_t Addr, const void *Decoder) {
+  return DecodeVecShiftLImm(Inst, Imm, 8);
+}
+
+static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
+                                                   uint32_t insn, uint64_t Addr,
+                                                   const void *Decoder) {
+  unsigned Rd = fieldFromInstruction(insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(insn, 5, 5);
+  unsigned Rm = fieldFromInstruction(insn, 16, 5);
+  unsigned shiftHi = fieldFromInstruction(insn, 22, 2);
+  unsigned shiftLo = fieldFromInstruction(insn, 10, 6);
+  unsigned shift = (shiftHi << 6) | shiftLo;
+  switch (Inst.getOpcode()) {
+  default:
+    return Fail;
+  case ARM64::ANDWrs:
+  case ARM64::ANDSWrs:
+  case ARM64::BICWrs:
+  case ARM64::BICSWrs:
+  case ARM64::ORRWrs:
+  case ARM64::ORNWrs:
+  case ARM64::EORWrs:
+  case ARM64::EONWrs:
+  case ARM64::ADDWrs:
+  case ARM64::ADDSWrs:
+  case ARM64::SUBWrs:
+  case ARM64::SUBSWrs: {
+    DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
+    DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder);
+    DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
+    break;
+  }
+  case ARM64::ANDXrs:
+  case ARM64::ANDSXrs:
+  case ARM64::BICXrs:
+  case ARM64::BICSXrs:
+  case ARM64::ORRXrs:
+  case ARM64::ORNXrs:
+  case ARM64::EORXrs:
+  case ARM64::EONXrs:
+  case ARM64::ADDXrs:
+  case ARM64::ADDSXrs:
+  case ARM64::SUBXrs:
+  case ARM64::SUBSXrs:
+    DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
+    DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
+    DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
+    break;
+  }
+
+  Inst.addOperand(MCOperand::CreateImm(shift));
+  return Success;
+}
+
+static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
+                                             uint64_t Addr,
+                                             const void *Decoder) {
+  unsigned Rd = fieldFromInstruction(insn, 0, 5);
+  unsigned imm = fieldFromInstruction(insn, 5, 16);
+  unsigned shift = fieldFromInstruction(insn, 21, 2);
+  shift <<= 4;
+  switch (Inst.getOpcode()) {
+  default:
+    return Fail;
+  case ARM64::MOVZWi:
+  case ARM64::MOVNWi:
+  case ARM64::MOVKWi:
+    DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
+    break;
+  case ARM64::MOVZXi:
+  case ARM64::MOVNXi:
+  case ARM64::MOVKXi:
+    DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
+    break;
+  }
+
+  if (Inst.getOpcode() == ARM64::MOVKWi || Inst.getOpcode() == ARM64::MOVKXi)
+    Inst.addOperand(Inst.getOperand(0));
+
+  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::CreateImm(shift));
+  return Success;
+}
+
+static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
+                                                  uint32_t insn, uint64_t Addr,
+                                                  const void *Decoder) {
+  unsigned Rt = fieldFromInstruction(insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(insn, 5, 5);
+  unsigned offset = fieldFromInstruction(insn, 10, 12);
+  const ARM64Disassembler *Dis =
+      static_cast<const ARM64Disassembler *>(Decoder);
+
+  switch (Inst.getOpcode()) {
+  default:
+    return Fail;
+  case ARM64::PRFMui:
+    // Rt is an immediate in prefetch.
+    Inst.addOperand(MCOperand::CreateImm(Rt));
+    break;
+  case ARM64::STRBBui:
+  case ARM64::LDRBBui:
+  case ARM64::LDRSBWui:
+  case ARM64::STRHHui:
+  case ARM64::LDRHHui:
+  case ARM64::LDRSHWui:
+  case ARM64::STRWui:
+  case ARM64::LDRWui:
+    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRSBXui:
+  case ARM64::LDRSHXui:
+  case ARM64::LDRSWui:
+  case ARM64::STRXui:
+  case ARM64::LDRXui:
+    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRQui:
+  case ARM64::STRQui:
+    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRDui:
+  case ARM64::STRDui:
+    DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRSui:
+  case ARM64::STRSui:
+    DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRHui:
+  case ARM64::STRHui:
+    DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRBui:
+  case ARM64::STRBui:
+    DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  }
+
+  DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+  if (!Dis->tryAddingSymbolicOperand(Addr, offset, Fail, 4, Inst, insn))
+    Inst.addOperand(MCOperand::CreateImm(offset));
+  return Success;
+}
+
+static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
+                                                uint32_t insn, uint64_t Addr,
+                                                const void *Decoder) {
+  unsigned Rt = fieldFromInstruction(insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(insn, 5, 5);
+  int64_t offset = fieldFromInstruction(insn, 12, 9);
+
+  // offset is a 9-bit signed immediate, so sign extend it to
+  // fill the unsigned.
+  if (offset & (1 << (9 - 1)))
+    offset |= ~((1LL << 9) - 1);
+
+  switch (Inst.getOpcode()) {
+  default:
+    return Fail;
+  case ARM64::PRFUMi:
+    // Rt is an immediate in prefetch.
+    Inst.addOperand(MCOperand::CreateImm(Rt));
+    break;
+  case ARM64::STURBBi:
+  case ARM64::LDURBBi:
+  case ARM64::LDURSBWi:
+  case ARM64::STURHHi:
+  case ARM64::LDURHHi:
+  case ARM64::LDURSHWi:
+  case ARM64::STURWi:
+  case ARM64::LDURWi:
+  case ARM64::LDTRSBWi:
+  case ARM64::LDTRSHWi:
+  case ARM64::STTRWi:
+  case ARM64::LDTRWi:
+  case ARM64::STTRHi:
+  case ARM64::LDTRHi:
+  case ARM64::LDTRBi:
+  case ARM64::STTRBi:
+  case ARM64::LDRSBWpre:
+  case ARM64::LDRSHWpre:
+  case ARM64::STRBBpre:
+  case ARM64::LDRBBpre:
+  case ARM64::STRHHpre:
+  case ARM64::LDRHHpre:
+  case ARM64::STRWpre:
+  case ARM64::LDRWpre:
+  case ARM64::LDRSBWpost:
+  case ARM64::LDRSHWpost:
+  case ARM64::STRBBpost:
+  case ARM64::LDRBBpost:
+  case ARM64::STRHHpost:
+  case ARM64::LDRHHpost:
+  case ARM64::STRWpost:
+  case ARM64::LDRWpost:
+    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDURSBXi:
+  case ARM64::LDURSHXi:
+  case ARM64::LDURSWi:
+  case ARM64::STURXi:
+  case ARM64::LDURXi:
+  case ARM64::LDTRSBXi:
+  case ARM64::LDTRSHXi:
+  case ARM64::LDTRSWi:
+  case ARM64::STTRXi:
+  case ARM64::LDTRXi:
+  case ARM64::LDRSBXpre:
+  case ARM64::LDRSHXpre:
+  case ARM64::STRXpre:
+  case ARM64::LDRSWpre:
+  case ARM64::LDRXpre:
+  case ARM64::LDRSBXpost:
+  case ARM64::LDRSHXpost:
+  case ARM64::STRXpost:
+  case ARM64::LDRSWpost:
+  case ARM64::LDRXpost:
+    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDURQi:
+  case ARM64::STURQi:
+  case ARM64::LDRQpre:
+  case ARM64::STRQpre:
+  case ARM64::LDRQpost:
+  case ARM64::STRQpost:
+    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDURDi:
+  case ARM64::STURDi:
+  case ARM64::LDRDpre:
+  case ARM64::STRDpre:
+  case ARM64::LDRDpost:
+  case ARM64::STRDpost:
+    DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDURSi:
+  case ARM64::STURSi:
+  case ARM64::LDRSpre:
+  case ARM64::STRSpre:
+  case ARM64::LDRSpost:
+  case ARM64::STRSpost:
+    DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDURHi:
+  case ARM64::STURHi:
+  case ARM64::LDRHpre:
+  case ARM64::STRHpre:
+  case ARM64::LDRHpost:
+  case ARM64::STRHpost:
+    DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDURBi:
+  case ARM64::STURBi:
+  case ARM64::LDRBpre:
+  case ARM64::STRBpre:
+  case ARM64::LDRBpost:
+  case ARM64::STRBpost:
+    DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  }
+
+  DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+  Inst.addOperand(MCOperand::CreateImm(offset));
+  return Success;
+}
+
+static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
+                                                   uint32_t insn, uint64_t Addr,
+                                                   const void *Decoder) {
+  unsigned Rt = fieldFromInstruction(insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(insn, 5, 5);
+  unsigned Rt2 = fieldFromInstruction(insn, 10, 5);
+  unsigned Rs = fieldFromInstruction(insn, 16, 5);
+
+  switch (Inst.getOpcode()) {
+  default:
+    return Fail;
+  case ARM64::STLXRW:
+  case ARM64::STLXRB:
+  case ARM64::STLXRH:
+  case ARM64::STXRW:
+  case ARM64::STXRB:
+  case ARM64::STXRH:
+    DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
+  // FALLTHROUGH
+  case ARM64::LDARW:
+  case ARM64::LDARB:
+  case ARM64::LDARH:
+  case ARM64::LDAXRW:
+  case ARM64::LDAXRB:
+  case ARM64::LDAXRH:
+  case ARM64::LDXRW:
+  case ARM64::LDXRB:
+  case ARM64::LDXRH:
+  case ARM64::STLRW:
+  case ARM64::STLRB:
+  case ARM64::STLRH:
+    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::STLXRX:
+  case ARM64::STXRX:
+    DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
+  // FALLTHROUGH
+  case ARM64::LDARX:
+  case ARM64::LDAXRX:
+  case ARM64::LDXRX:
+  case ARM64::STLRX:
+    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::STLXPW:
+  case ARM64::STXPW:
+    DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
+  // FALLTHROUGH
+  case ARM64::LDAXPW:
+  case ARM64::LDXPW:
+    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
+    DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder);
+    break;
+  case ARM64::STLXPX:
+  case ARM64::STXPX:
+    DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
+  // FALLTHROUGH
+  case ARM64::LDAXPX:
+  case ARM64::LDXPX:
+    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder);
+    break;
+  }
+
+  DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+  return Success;
+}
+
+static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
+                                              uint64_t Addr,
+                                              const void *Decoder) {
+  unsigned Rt = fieldFromInstruction(insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(insn, 5, 5);
+  unsigned Rt2 = fieldFromInstruction(insn, 10, 5);
+  int64_t offset = fieldFromInstruction(insn, 15, 7);
+
+  // offset is a 7-bit signed immediate, so sign extend it to
+  // fill the unsigned.
+  if (offset & (1 << (7 - 1)))
+    offset |= ~((1LL << 7) - 1);
+
+  switch (Inst.getOpcode()) {
+  default:
+    return Fail;
+  case ARM64::LDNPXi:
+  case ARM64::STNPXi:
+  case ARM64::LDPXpost:
+  case ARM64::STPXpost:
+  case ARM64::LDPSWpost:
+  case ARM64::LDPXi:
+  case ARM64::STPXi:
+  case ARM64::LDPSWi:
+  case ARM64::LDPXpre:
+  case ARM64::STPXpre:
+  case ARM64::LDPSWpre:
+    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder);
+    break;
+  case ARM64::LDNPWi:
+  case ARM64::STNPWi:
+  case ARM64::LDPWpost:
+  case ARM64::STPWpost:
+  case ARM64::LDPWi:
+  case ARM64::STPWi:
+  case ARM64::LDPWpre:
+  case ARM64::STPWpre:
+    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
+    DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder);
+    break;
+  case ARM64::LDNPQi:
+  case ARM64::STNPQi:
+  case ARM64::LDPQpost:
+  case ARM64::STPQpost:
+  case ARM64::LDPQi:
+  case ARM64::STPQi:
+  case ARM64::LDPQpre:
+  case ARM64::STPQpre:
+    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
+    DecodeFPR128RegisterClass(Inst, Rt2, Addr, Decoder);
+    break;
+  case ARM64::LDNPDi:
+  case ARM64::STNPDi:
+  case ARM64::LDPDpost:
+  case ARM64::STPDpost:
+  case ARM64::LDPDi:
+  case ARM64::STPDi:
+  case ARM64::LDPDpre:
+  case ARM64::STPDpre:
+    DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    DecodeFPR64RegisterClass(Inst, Rt2, Addr, Decoder);
+    break;
+  case ARM64::LDNPSi:
+  case ARM64::STNPSi:
+  case ARM64::LDPSpost:
+  case ARM64::STPSpost:
+  case ARM64::LDPSi:
+  case ARM64::STPSi:
+  case ARM64::LDPSpre:
+  case ARM64::STPSpre:
+    DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
+    DecodeFPR32RegisterClass(Inst, Rt2, Addr, Decoder);
+    break;
+  }
+
+  DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+  Inst.addOperand(MCOperand::CreateImm(offset));
+  return Success;
+}
+
+static DecodeStatus DecodeRegOffsetLdStInstruction(llvm::MCInst &Inst,
+                                                   uint32_t insn, uint64_t Addr,
+                                                   const void *Decoder) {
+  unsigned Rt = fieldFromInstruction(insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(insn, 5, 5);
+  unsigned Rm = fieldFromInstruction(insn, 16, 5);
+  unsigned extendHi = fieldFromInstruction(insn, 13, 3);
+  unsigned extendLo = fieldFromInstruction(insn, 12, 1);
+  unsigned extend = 0;
+
+  switch (Inst.getOpcode()) {
+  default:
+    return Fail;
+  case ARM64::LDRSWro:
+    extend = (extendHi << 1) | extendLo;
+    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRXro:
+  case ARM64::STRXro:
+    extend = (extendHi << 1) | extendLo;
+    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRWro:
+  case ARM64::STRWro:
+    extend = (extendHi << 1) | extendLo;
+    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRQro:
+  case ARM64::STRQro:
+    extend = (extendHi << 1) | extendLo;
+    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRDro:
+  case ARM64::STRDro:
+    extend = (extendHi << 1) | extendLo;
+    DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRSro:
+  case ARM64::STRSro:
+    extend = (extendHi << 1) | extendLo;
+    DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRHro:
+    extend = (extendHi << 1) | extendLo;
+    DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRBro:
+    extend = (extendHi << 1) | extendLo;
+    DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRBBro:
+  case ARM64::STRBBro:
+  case ARM64::LDRSBWro:
+    extend = (extendHi << 1) | extendLo;
+    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRHHro:
+  case ARM64::STRHHro:
+  case ARM64::LDRSHWro:
+    extend = (extendHi << 1) | extendLo;
+    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRSHXro:
+    extend = (extendHi << 1) | extendLo;
+    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LDRSBXro:
+    extend = (extendHi << 1) | extendLo;
+    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::PRFMro:
+    extend = (extendHi << 1) | extendLo;
+    Inst.addOperand(MCOperand::CreateImm(Rt));
+  }
+
+  DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+
+  if (extendHi == 0x3)
+    DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
+  else
+    DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
+
+  Inst.addOperand(MCOperand::CreateImm(extend));
+  return Success;
+}
+
+static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
+                                                uint32_t insn, uint64_t Addr,
+                                                const void *Decoder) {
+  unsigned Rd = fieldFromInstruction(insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(insn, 5, 5);
+  unsigned Rm = fieldFromInstruction(insn, 16, 5);
+  unsigned extend = fieldFromInstruction(insn, 10, 6);
+
+  switch (Inst.getOpcode()) {
+  default:
+    return Fail;
+  case ARM64::ADDWrx:
+  case ARM64::SUBWrx:
+    DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder);
+    DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
+    DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
+    break;
+  case ARM64::ADDSWrx:
+  case ARM64::SUBSWrx:
+    DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
+    DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
+    DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
+    break;
+  case ARM64::ADDXrx:
+  case ARM64::SUBXrx:
+    DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
+    DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+    DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
+    break;
+  case ARM64::ADDSXrx:
+  case ARM64::SUBSXrx:
+    DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
+    DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+    DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
+    break;
+  case ARM64::ADDXrx64:
+  case ARM64::ADDSXrx64:
+  case ARM64::SUBXrx64:
+  case ARM64::SUBSXrx64:
+    DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
+    DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+    DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
+    break;
+  }
+
+  Inst.addOperand(MCOperand::CreateImm(extend));
+  return Success;
+}
+
+static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
+                                                uint32_t insn, uint64_t Addr,
+                                                const void *Decoder) {
+  unsigned Rd = fieldFromInstruction(insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(insn, 5, 5);
+  unsigned Datasize = fieldFromInstruction(insn, 31, 1);
+  unsigned imm;
+
+  if (Datasize) {
+    DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
+    DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
+    imm = fieldFromInstruction(insn, 10, 13);
+    if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 64))
+      return Fail;
+  } else {
+    DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
+    DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder);
+    imm = fieldFromInstruction(insn, 10, 12);
+    if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 32))
+      return Fail;
+  }
+  Inst.addOperand(MCOperand::CreateImm(imm));
+  return Success;
+}
+
+static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
+                                            uint64_t Addr,
+                                            const void *Decoder) {
+  unsigned Rd = fieldFromInstruction(insn, 0, 5);
+  unsigned cmode = fieldFromInstruction(insn, 12, 4);
+  unsigned imm = fieldFromInstruction(insn, 16, 3) << 5;
+  imm |= fieldFromInstruction(insn, 5, 5);
+
+  if (Inst.getOpcode() == ARM64::MOVID)
+    DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder);
+  else
+    DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
+
+  Inst.addOperand(MCOperand::CreateImm(imm));
+
+  switch (Inst.getOpcode()) {
+  default:
+    break;
+  case ARM64::MOVIv4i16:
+  case ARM64::MOVIv8i16:
+  case ARM64::MVNIv4i16:
+  case ARM64::MVNIv8i16:
+  case ARM64::MOVIv2i32:
+  case ARM64::MOVIv4i32:
+  case ARM64::MVNIv2i32:
+  case ARM64::MVNIv4i32:
+    Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2));
+    break;
+  case ARM64::MOVIv2s_msl:
+  case ARM64::MOVIv4s_msl:
+  case ARM64::MVNIv2s_msl:
+  case ARM64::MVNIv4s_msl:
+    Inst.addOperand(MCOperand::CreateImm(cmode & 1 ? 0x110 : 0x108));
+    break;
+  }
+
+  return Success;
+}
+
+static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst,
+                                                uint32_t insn, uint64_t Addr,
+                                                const void *Decoder) {
+  unsigned Rd = fieldFromInstruction(insn, 0, 5);
+  unsigned cmode = fieldFromInstruction(insn, 12, 4);
+  unsigned imm = fieldFromInstruction(insn, 16, 3) << 5;
+  imm |= fieldFromInstruction(insn, 5, 5);
+
+  // Tied operands added twice.
+  DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
+  DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
+
+  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2));
+
+  return Success;
+}
+
+static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
+                                         uint64_t Addr, const void *Decoder) {
+  unsigned Rd = fieldFromInstruction(insn, 0, 5);
+  int64_t imm = fieldFromInstruction(insn, 5, 19) << 2;
+  imm |= fieldFromInstruction(insn, 29, 2);
+  const ARM64Disassembler *Dis =
+      static_cast<const ARM64Disassembler *>(Decoder);
+
+  // Sign-extend the 21-bit immediate.
+  if (imm & (1 << (21 - 1)))
+    imm |= ~((1LL << 21) - 1);
+
+  DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
+  if (!Dis->tryAddingSymbolicOperand(Addr, imm, Fail, 4, Inst, insn))
+    Inst.addOperand(MCOperand::CreateImm(imm));
+
+  return Success;
+}
+
+static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
+                                        uint64_t Addr, const void *Decoder) {
+  unsigned Rd = fieldFromInstruction(insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(insn, 5, 5);
+  unsigned Imm = fieldFromInstruction(insn, 10, 14);
+  unsigned S = fieldFromInstruction(insn, 29, 1);
+  unsigned Datasize = fieldFromInstruction(insn, 31, 1);
+
+  unsigned ShifterVal = (Imm >> 12) & 3;
+  unsigned ImmVal = Imm & 0xFFF;
+  const ARM64Disassembler *Dis =
+      static_cast<const ARM64Disassembler *>(Decoder);
+
+  if (ShifterVal != 0 && ShifterVal != 1)
+    return Fail;
+
+  if (Datasize) {
+    if (Rd == 31 && !S)
+      DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
+    else
+      DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
+    DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+  } else {
+    if (Rd == 31 && !S)
+      DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder);
+    else
+      DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
+    DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
+  }
+
+  if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal, Fail, 4, Inst, insn))
+    Inst.addOperand(MCOperand::CreateImm(ImmVal));
+  Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal));
+  return Success;
+}
+
+static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
+                                              uint64_t Addr,
+                                              const void *Decoder) {
+  int64_t imm = fieldFromInstruction(insn, 0, 26);
+  const ARM64Disassembler *Dis =
+      static_cast<const ARM64Disassembler *>(Decoder);
+
+  // Sign-extend the 26-bit immediate.
+  if (imm & (1 << (26 - 1)))
+    imm |= ~((1LL << 26) - 1);
+
+  if (!Dis->tryAddingSymbolicOperand(Addr, imm << 2, Success, 4, Inst))
+    Inst.addOperand(MCOperand::CreateImm(imm));
+
+  return Success;
+}
+
+static DecodeStatus DecodeSystemCPSRInstruction(llvm::MCInst &Inst,
+                                                uint32_t insn, uint64_t Addr,
+                                                const void *Decoder) {
+  uint64_t op1 = fieldFromInstruction(insn, 16, 3);
+  uint64_t op2 = fieldFromInstruction(insn, 5, 3);
+  uint64_t crm = fieldFromInstruction(insn, 8, 4);
+
+  Inst.addOperand(MCOperand::CreateImm((op1 << 3) | op2));
+  Inst.addOperand(MCOperand::CreateImm(crm));
+
+  return Success;
+}
+
+static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
+                                        uint64_t Addr, const void *Decoder) {
+  uint64_t Rt = fieldFromInstruction(insn, 0, 5);
+  uint64_t bit = fieldFromInstruction(insn, 31, 1) << 5;
+  bit |= fieldFromInstruction(insn, 19, 5);
+  int64_t dst = fieldFromInstruction(insn, 5, 14);
+  const ARM64Disassembler *Dis =
+      static_cast<const ARM64Disassembler *>(Decoder);
+
+  // Sign-extend 14-bit immediate.
+  if (dst & (1 << (14 - 1)))
+    dst |= ~((1LL << 14) - 1);
+
+  DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+  Inst.addOperand(MCOperand::CreateImm(bit));
+  if (!Dis->tryAddingSymbolicOperand(Addr, dst << 2, Success, 4, Inst))
+    Inst.addOperand(MCOperand::CreateImm(dst));
+
+  return Success;
+}
+
+static DecodeStatus DecodeSIMDLdStPost(llvm::MCInst &Inst, uint32_t insn,
+                                       uint64_t Addr, const void *Decoder) {
+  uint64_t Rd = fieldFromInstruction(insn, 0, 5);
+  uint64_t Rn = fieldFromInstruction(insn, 5, 5);
+  uint64_t Rm = fieldFromInstruction(insn, 16, 5);
+
+  switch (Inst.getOpcode()) {
+  default:
+    return Fail;
+  case ARM64::ST1Onev8b_POST:
+  case ARM64::ST1Onev4h_POST:
+  case ARM64::ST1Onev2s_POST:
+  case ARM64::ST1Onev1d_POST:
+  case ARM64::LD1Onev8b_POST:
+  case ARM64::LD1Onev4h_POST:
+  case ARM64::LD1Onev2s_POST:
+  case ARM64::LD1Onev1d_POST:
+    DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder);
+    break;
+  case ARM64::ST1Onev16b_POST:
+  case ARM64::ST1Onev8h_POST:
+  case ARM64::ST1Onev4s_POST:
+  case ARM64::ST1Onev2d_POST:
+  case ARM64::LD1Onev16b_POST:
+  case ARM64::LD1Onev8h_POST:
+  case ARM64::LD1Onev4s_POST:
+  case ARM64::LD1Onev2d_POST:
+    DecodeFPR128RegisterClass(Inst, Rd, Addr, Decoder);
+    break;
+  case ARM64::ST1Twov8b_POST:
+  case ARM64::ST1Twov4h_POST:
+  case ARM64::ST1Twov2s_POST:
+  case ARM64::ST1Twov1d_POST:
+  case ARM64::ST2Twov8b_POST:
+  case ARM64::ST2Twov4h_POST:
+  case ARM64::ST2Twov2s_POST:
+  case ARM64::LD1Twov8b_POST:
+  case ARM64::LD1Twov4h_POST:
+  case ARM64::LD1Twov2s_POST:
+  case ARM64::LD1Twov1d_POST:
+  case ARM64::LD2Twov8b_POST:
+  case ARM64::LD2Twov4h_POST:
+  case ARM64::LD2Twov2s_POST:
+    DecodeDDRegisterClass(Inst, Rd, Addr, Decoder);
+    break;
+  case ARM64::ST1Threev8b_POST:
+  case ARM64::ST1Threev4h_POST:
+  case ARM64::ST1Threev2s_POST:
+  case ARM64::ST1Threev1d_POST:
+  case ARM64::ST3Threev8b_POST:
+  case ARM64::ST3Threev4h_POST:
+  case ARM64::ST3Threev2s_POST:
+  case ARM64::LD1Threev8b_POST:
+  case ARM64::LD1Threev4h_POST:
+  case ARM64::LD1Threev2s_POST:
+  case ARM64::LD1Threev1d_POST:
+  case ARM64::LD3Threev8b_POST:
+  case ARM64::LD3Threev4h_POST:
+  case ARM64::LD3Threev2s_POST:
+    DecodeDDDRegisterClass(Inst, Rd, Addr, Decoder);
+    break;
+  case ARM64::ST1Fourv8b_POST:
+  case ARM64::ST1Fourv4h_POST:
+  case ARM64::ST1Fourv2s_POST:
+  case ARM64::ST1Fourv1d_POST:
+  case ARM64::ST4Fourv8b_POST:
+  case ARM64::ST4Fourv4h_POST:
+  case ARM64::ST4Fourv2s_POST:
+  case ARM64::LD1Fourv8b_POST:
+  case ARM64::LD1Fourv4h_POST:
+  case ARM64::LD1Fourv2s_POST:
+  case ARM64::LD1Fourv1d_POST:
+  case ARM64::LD4Fourv8b_POST:
+  case ARM64::LD4Fourv4h_POST:
+  case ARM64::LD4Fourv2s_POST:
+    DecodeDDDDRegisterClass(Inst, Rd, Addr, Decoder);
+    break;
+  case ARM64::ST1Twov16b_POST:
+  case ARM64::ST1Twov8h_POST:
+  case ARM64::ST1Twov4s_POST:
+  case ARM64::ST1Twov2d_POST:
+  case ARM64::ST2Twov16b_POST:
+  case ARM64::ST2Twov8h_POST:
+  case ARM64::ST2Twov4s_POST:
+  case ARM64::ST2Twov2d_POST:
+  case ARM64::LD1Twov16b_POST:
+  case ARM64::LD1Twov8h_POST:
+  case ARM64::LD1Twov4s_POST:
+  case ARM64::LD1Twov2d_POST:
+  case ARM64::LD2Twov16b_POST:
+  case ARM64::LD2Twov8h_POST:
+  case ARM64::LD2Twov4s_POST:
+  case ARM64::LD2Twov2d_POST:
+    DecodeQQRegisterClass(Inst, Rd, Addr, Decoder);
+    break;
+  case ARM64::ST1Threev16b_POST:
+  case ARM64::ST1Threev8h_POST:
+  case ARM64::ST1Threev4s_POST:
+  case ARM64::ST1Threev2d_POST:
+  case ARM64::ST3Threev16b_POST:
+  case ARM64::ST3Threev8h_POST:
+  case ARM64::ST3Threev4s_POST:
+  case ARM64::ST3Threev2d_POST:
+  case ARM64::LD1Threev16b_POST:
+  case ARM64::LD1Threev8h_POST:
+  case ARM64::LD1Threev4s_POST:
+  case ARM64::LD1Threev2d_POST:
+  case ARM64::LD3Threev16b_POST:
+  case ARM64::LD3Threev8h_POST:
+  case ARM64::LD3Threev4s_POST:
+  case ARM64::LD3Threev2d_POST:
+    DecodeQQQRegisterClass(Inst, Rd, Addr, Decoder);
+    break;
+  case ARM64::ST1Fourv16b_POST:
+  case ARM64::ST1Fourv8h_POST:
+  case ARM64::ST1Fourv4s_POST:
+  case ARM64::ST1Fourv2d_POST:
+  case ARM64::ST4Fourv16b_POST:
+  case ARM64::ST4Fourv8h_POST:
+  case ARM64::ST4Fourv4s_POST:
+  case ARM64::ST4Fourv2d_POST:
+  case ARM64::LD1Fourv16b_POST:
+  case ARM64::LD1Fourv8h_POST:
+  case ARM64::LD1Fourv4s_POST:
+  case ARM64::LD1Fourv2d_POST:
+  case ARM64::LD4Fourv16b_POST:
+  case ARM64::LD4Fourv8h_POST:
+  case ARM64::LD4Fourv4s_POST:
+  case ARM64::LD4Fourv2d_POST:
+    DecodeQQQQRegisterClass(Inst, Rd, Addr, Decoder);
+    break;
+  }
+
+  DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+  DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
+  return Success;
+}
+
+static DecodeStatus DecodeSIMDLdStSingle(llvm::MCInst &Inst, uint32_t insn,
+                                         uint64_t Addr, const void *Decoder) {
+  uint64_t Rt = fieldFromInstruction(insn, 0, 5);
+  uint64_t Rn = fieldFromInstruction(insn, 5, 5);
+  uint64_t Rm = fieldFromInstruction(insn, 16, 5);
+  uint64_t size = fieldFromInstruction(insn, 10, 2);
+  uint64_t S = fieldFromInstruction(insn, 12, 1);
+  uint64_t Q = fieldFromInstruction(insn, 30, 1);
+  uint64_t index = 0;
+
+  switch (Inst.getOpcode()) {
+  case ARM64::ST1i8:
+  case ARM64::ST1i8_POST:
+  case ARM64::ST2i8:
+  case ARM64::ST2i8_POST:
+  case ARM64::ST3i8_POST:
+  case ARM64::ST3i8:
+  case ARM64::ST4i8_POST:
+  case ARM64::ST4i8:
+    index = (Q << 3) | (S << 2) | size;
+    break;
+  case ARM64::ST1i16:
+  case ARM64::ST1i16_POST:
+  case ARM64::ST2i16:
+  case ARM64::ST2i16_POST:
+  case ARM64::ST3i16_POST:
+  case ARM64::ST3i16:
+  case ARM64::ST4i16_POST:
+  case ARM64::ST4i16:
+    index = (Q << 2) | (S << 1) | (size >> 1);
+    break;
+  case ARM64::ST1i32:
+  case ARM64::ST1i32_POST:
+  case ARM64::ST2i32:
+  case ARM64::ST2i32_POST:
+  case ARM64::ST3i32_POST:
+  case ARM64::ST3i32:
+  case ARM64::ST4i32_POST:
+  case ARM64::ST4i32:
+    index = (Q << 1) | S;
+    break;
+  case ARM64::ST1i64:
+  case ARM64::ST1i64_POST:
+  case ARM64::ST2i64:
+  case ARM64::ST2i64_POST:
+  case ARM64::ST3i64_POST:
+  case ARM64::ST3i64:
+  case ARM64::ST4i64_POST:
+  case ARM64::ST4i64:
+    index = Q;
+    break;
+  }
+
+  switch (Inst.getOpcode()) {
+  default:
+    return Fail;
+  case ARM64::LD1Rv8b:
+  case ARM64::LD1Rv8b_POST:
+  case ARM64::LD1Rv4h:
+  case ARM64::LD1Rv4h_POST:
+  case ARM64::LD1Rv2s:
+  case ARM64::LD1Rv2s_POST:
+  case ARM64::LD1Rv1d:
+  case ARM64::LD1Rv1d_POST:
+    DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LD1Rv16b:
+  case ARM64::LD1Rv16b_POST:
+  case ARM64::LD1Rv8h:
+  case ARM64::LD1Rv8h_POST:
+  case ARM64::LD1Rv4s:
+  case ARM64::LD1Rv4s_POST:
+  case ARM64::LD1Rv2d:
+  case ARM64::LD1Rv2d_POST:
+  case ARM64::ST1i8:
+  case ARM64::ST1i8_POST:
+  case ARM64::ST1i16:
+  case ARM64::ST1i16_POST:
+  case ARM64::ST1i32:
+  case ARM64::ST1i32_POST:
+  case ARM64::ST1i64:
+  case ARM64::ST1i64_POST:
+    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LD2Rv16b:
+  case ARM64::LD2Rv16b_POST:
+  case ARM64::LD2Rv8h:
+  case ARM64::LD2Rv8h_POST:
+  case ARM64::LD2Rv4s:
+  case ARM64::LD2Rv4s_POST:
+  case ARM64::LD2Rv2d:
+  case ARM64::LD2Rv2d_POST:
+  case ARM64::ST2i8:
+  case ARM64::ST2i8_POST:
+  case ARM64::ST2i16:
+  case ARM64::ST2i16_POST:
+  case ARM64::ST2i32:
+  case ARM64::ST2i32_POST:
+  case ARM64::ST2i64:
+  case ARM64::ST2i64_POST:
+    DecodeQQRegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LD2Rv8b:
+  case ARM64::LD2Rv8b_POST:
+  case ARM64::LD2Rv4h:
+  case ARM64::LD2Rv4h_POST:
+  case ARM64::LD2Rv2s:
+  case ARM64::LD2Rv2s_POST:
+  case ARM64::LD2Rv1d:
+  case ARM64::LD2Rv1d_POST:
+    DecodeDDRegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LD3Rv8b:
+  case ARM64::LD3Rv8b_POST:
+  case ARM64::LD3Rv4h:
+  case ARM64::LD3Rv4h_POST:
+  case ARM64::LD3Rv2s:
+  case ARM64::LD3Rv2s_POST:
+  case ARM64::LD3Rv1d:
+  case ARM64::LD3Rv1d_POST:
+    DecodeDDDRegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LD3Rv16b:
+  case ARM64::LD3Rv16b_POST:
+  case ARM64::LD3Rv8h:
+  case ARM64::LD3Rv8h_POST:
+  case ARM64::LD3Rv4s:
+  case ARM64::LD3Rv4s_POST:
+  case ARM64::LD3Rv2d:
+  case ARM64::LD3Rv2d_POST:
+  case ARM64::ST3i8:
+  case ARM64::ST3i8_POST:
+  case ARM64::ST3i16:
+  case ARM64::ST3i16_POST:
+  case ARM64::ST3i32:
+  case ARM64::ST3i32_POST:
+  case ARM64::ST3i64:
+  case ARM64::ST3i64_POST:
+    DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LD4Rv8b:
+  case ARM64::LD4Rv8b_POST:
+  case ARM64::LD4Rv4h:
+  case ARM64::LD4Rv4h_POST:
+  case ARM64::LD4Rv2s:
+  case ARM64::LD4Rv2s_POST:
+  case ARM64::LD4Rv1d:
+  case ARM64::LD4Rv1d_POST:
+    DecodeDDDDRegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LD4Rv16b:
+  case ARM64::LD4Rv16b_POST:
+  case ARM64::LD4Rv8h:
+  case ARM64::LD4Rv8h_POST:
+  case ARM64::LD4Rv4s:
+  case ARM64::LD4Rv4s_POST:
+  case ARM64::LD4Rv2d:
+  case ARM64::LD4Rv2d_POST:
+  case ARM64::ST4i8:
+  case ARM64::ST4i8_POST:
+  case ARM64::ST4i16:
+  case ARM64::ST4i16_POST:
+  case ARM64::ST4i32:
+  case ARM64::ST4i32_POST:
+  case ARM64::ST4i64:
+  case ARM64::ST4i64_POST:
+    DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  }
+
+  switch (Inst.getOpcode()) {
+  case ARM64::LD1Rv8b:
+  case ARM64::LD1Rv8b_POST:
+  case ARM64::LD1Rv16b:
+  case ARM64::LD1Rv16b_POST:
+  case ARM64::LD1Rv4h:
+  case ARM64::LD1Rv4h_POST:
+  case ARM64::LD1Rv8h:
+  case ARM64::LD1Rv8h_POST:
+  case ARM64::LD1Rv4s:
+  case ARM64::LD1Rv4s_POST:
+  case ARM64::LD1Rv2s:
+  case ARM64::LD1Rv2s_POST:
+  case ARM64::LD1Rv1d:
+  case ARM64::LD1Rv1d_POST:
+  case ARM64::LD1Rv2d:
+  case ARM64::LD1Rv2d_POST:
+  case ARM64::LD2Rv8b:
+  case ARM64::LD2Rv8b_POST:
+  case ARM64::LD2Rv16b:
+  case ARM64::LD2Rv16b_POST:
+  case ARM64::LD2Rv4h:
+  case ARM64::LD2Rv4h_POST:
+  case ARM64::LD2Rv8h:
+  case ARM64::LD2Rv8h_POST:
+  case ARM64::LD2Rv2s:
+  case ARM64::LD2Rv2s_POST:
+  case ARM64::LD2Rv4s:
+  case ARM64::LD2Rv4s_POST:
+  case ARM64::LD2Rv2d:
+  case ARM64::LD2Rv2d_POST:
+  case ARM64::LD2Rv1d:
+  case ARM64::LD2Rv1d_POST:
+  case ARM64::LD3Rv8b:
+  case ARM64::LD3Rv8b_POST:
+  case ARM64::LD3Rv16b:
+  case ARM64::LD3Rv16b_POST:
+  case ARM64::LD3Rv4h:
+  case ARM64::LD3Rv4h_POST:
+  case ARM64::LD3Rv8h:
+  case ARM64::LD3Rv8h_POST:
+  case ARM64::LD3Rv2s:
+  case ARM64::LD3Rv2s_POST:
+  case ARM64::LD3Rv4s:
+  case ARM64::LD3Rv4s_POST:
+  case ARM64::LD3Rv2d:
+  case ARM64::LD3Rv2d_POST:
+  case ARM64::LD3Rv1d:
+  case ARM64::LD3Rv1d_POST:
+  case ARM64::LD4Rv8b:
+  case ARM64::LD4Rv8b_POST:
+  case ARM64::LD4Rv16b:
+  case ARM64::LD4Rv16b_POST:
+  case ARM64::LD4Rv4h:
+  case ARM64::LD4Rv4h_POST:
+  case ARM64::LD4Rv8h:
+  case ARM64::LD4Rv8h_POST:
+  case ARM64::LD4Rv2s:
+  case ARM64::LD4Rv2s_POST:
+  case ARM64::LD4Rv4s:
+  case ARM64::LD4Rv4s_POST:
+  case ARM64::LD4Rv2d:
+  case ARM64::LD4Rv2d_POST:
+  case ARM64::LD4Rv1d:
+  case ARM64::LD4Rv1d_POST:
+    break;
+  default:
+    Inst.addOperand(MCOperand::CreateImm(index));
+  }
+
+  DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
+
+  switch (Inst.getOpcode()) {
+  case ARM64::ST1i8_POST:
+  case ARM64::ST1i16_POST:
+  case ARM64::ST1i32_POST:
+  case ARM64::ST1i64_POST:
+  case ARM64::LD1Rv8b_POST:
+  case ARM64::LD1Rv16b_POST:
+  case ARM64::LD1Rv4h_POST:
+  case ARM64::LD1Rv8h_POST:
+  case ARM64::LD1Rv2s_POST:
+  case ARM64::LD1Rv4s_POST:
+  case ARM64::LD1Rv1d_POST:
+  case ARM64::LD1Rv2d_POST:
+  case ARM64::ST2i8_POST:
+  case ARM64::ST2i16_POST:
+  case ARM64::ST2i32_POST:
+  case ARM64::ST2i64_POST:
+  case ARM64::LD2Rv8b_POST:
+  case ARM64::LD2Rv16b_POST:
+  case ARM64::LD2Rv4h_POST:
+  case ARM64::LD2Rv8h_POST:
+  case ARM64::LD2Rv2s_POST:
+  case ARM64::LD2Rv4s_POST:
+  case ARM64::LD2Rv2d_POST:
+  case ARM64::LD2Rv1d_POST:
+  case ARM64::ST3i8_POST:
+  case ARM64::ST3i16_POST:
+  case ARM64::ST3i32_POST:
+  case ARM64::ST3i64_POST:
+  case ARM64::LD3Rv8b_POST:
+  case ARM64::LD3Rv16b_POST:
+  case ARM64::LD3Rv4h_POST:
+  case ARM64::LD3Rv8h_POST:
+  case ARM64::LD3Rv2s_POST:
+  case ARM64::LD3Rv4s_POST:
+  case ARM64::LD3Rv2d_POST:
+  case ARM64::LD3Rv1d_POST:
+  case ARM64::ST4i8_POST:
+  case ARM64::ST4i16_POST:
+  case ARM64::ST4i32_POST:
+  case ARM64::ST4i64_POST:
+  case ARM64::LD4Rv8b_POST:
+  case ARM64::LD4Rv16b_POST:
+  case ARM64::LD4Rv4h_POST:
+  case ARM64::LD4Rv8h_POST:
+  case ARM64::LD4Rv2s_POST:
+  case ARM64::LD4Rv4s_POST:
+  case ARM64::LD4Rv2d_POST:
+  case ARM64::LD4Rv1d_POST:
+    DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
+    break;
+  }
+  return Success;
+}
+
+static DecodeStatus DecodeSIMDLdStSingleTied(llvm::MCInst &Inst, uint32_t insn,
+                                             uint64_t Addr,
+                                             const void *Decoder) {
+  uint64_t Rt = fieldFromInstruction(insn, 0, 5);
+  uint64_t Rn = fieldFromInstruction(insn, 5, 5);
+  uint64_t Rm = fieldFromInstruction(insn, 16, 5);
+  uint64_t size = fieldFromInstruction(insn, 10, 2);
+  uint64_t S = fieldFromInstruction(insn, 12, 1);
+  uint64_t Q = fieldFromInstruction(insn, 30, 1);
+  uint64_t index = 0;
+
+  switch (Inst.getOpcode()) {
+  case ARM64::LD1i8:
+  case ARM64::LD1i8_POST:
+  case ARM64::LD2i8:
+  case ARM64::LD2i8_POST:
+  case ARM64::LD3i8_POST:
+  case ARM64::LD3i8:
+  case ARM64::LD4i8_POST:
+  case ARM64::LD4i8:
+    index = (Q << 3) | (S << 2) | size;
+    break;
+  case ARM64::LD1i16:
+  case ARM64::LD1i16_POST:
+  case ARM64::LD2i16:
+  case ARM64::LD2i16_POST:
+  case ARM64::LD3i16_POST:
+  case ARM64::LD3i16:
+  case ARM64::LD4i16_POST:
+  case ARM64::LD4i16:
+    index = (Q << 2) | (S << 1) | (size >> 1);
+    break;
+  case ARM64::LD1i32:
+  case ARM64::LD1i32_POST:
+  case ARM64::LD2i32:
+  case ARM64::LD2i32_POST:
+  case ARM64::LD3i32_POST:
+  case ARM64::LD3i32:
+  case ARM64::LD4i32_POST:
+  case ARM64::LD4i32:
+    index = (Q << 1) | S;
+    break;
+  case ARM64::LD1i64:
+  case ARM64::LD1i64_POST:
+  case ARM64::LD2i64:
+  case ARM64::LD2i64_POST:
+  case ARM64::LD3i64_POST:
+  case ARM64::LD3i64:
+  case ARM64::LD4i64_POST:
+  case ARM64::LD4i64:
+    index = Q;
+    break;
+  }
+
+  switch (Inst.getOpcode()) {
+  default:
+    return Fail;
+  case ARM64::LD1i8:
+  case ARM64::LD1i8_POST:
+  case ARM64::LD1i16:
+  case ARM64::LD1i16_POST:
+  case ARM64::LD1i32:
+  case ARM64::LD1i32_POST:
+  case ARM64::LD1i64:
+  case ARM64::LD1i64_POST:
+    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
+    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LD2i8:
+  case ARM64::LD2i8_POST:
+  case ARM64::LD2i16:
+  case ARM64::LD2i16_POST:
+  case ARM64::LD2i32:
+  case ARM64::LD2i32_POST:
+  case ARM64::LD2i64:
+  case ARM64::LD2i64_POST:
+    DecodeQQRegisterClass(Inst, Rt, Addr, Decoder);
+    DecodeQQRegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LD3i8:
+  case ARM64::LD3i8_POST:
+  case ARM64::LD3i16:
+  case ARM64::LD3i16_POST:
+  case ARM64::LD3i32:
+  case ARM64::LD3i32_POST:
+  case ARM64::LD3i64:
+  case ARM64::LD3i64_POST:
+    DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder);
+    DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  case ARM64::LD4i8:
+  case ARM64::LD4i8_POST:
+  case ARM64::LD4i16:
+  case ARM64::LD4i16_POST:
+  case ARM64::LD4i32:
+  case ARM64::LD4i32_POST:
+  case ARM64::LD4i64:
+  case ARM64::LD4i64_POST:
+    DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder);
+    DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder);
+    break;
+  }
+
+  Inst.addOperand(MCOperand::CreateImm(index));
+  DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
+
+  switch (Inst.getOpcode()) {
+  case ARM64::LD1i8_POST:
+  case ARM64::LD1i16_POST:
+  case ARM64::LD1i32_POST:
+  case ARM64::LD1i64_POST:
+  case ARM64::LD2i8_POST:
+  case ARM64::LD2i16_POST:
+  case ARM64::LD2i32_POST:
+  case ARM64::LD2i64_POST:
+  case ARM64::LD3i8_POST:
+  case ARM64::LD3i16_POST:
+  case ARM64::LD3i32_POST:
+  case ARM64::LD3i64_POST:
+  case ARM64::LD4i8_POST:
+  case ARM64::LD4i16_POST:
+  case ARM64::LD4i32_POST:
+  case ARM64::LD4i64_POST:
+    DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
+    break;
+  }
+  return Success;
+}

Added: llvm/trunk/lib/Target/ARM64/Disassembler/ARM64Disassembler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/Disassembler/ARM64Disassembler.h?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/Disassembler/ARM64Disassembler.h (added)
+++ llvm/trunk/lib/Target/ARM64/Disassembler/ARM64Disassembler.h Sat Mar 29 05:18:08 2014
@@ -0,0 +1,54 @@
+//===- ARM64Disassembler.h - Disassembler for ARM64 -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM64DISASSEMBLER_H
+#define ARM64DISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+namespace llvm {
+
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+class ARM64Disassembler : public MCDisassembler {
+public:
+  ARM64Disassembler(const MCSubtargetInfo &STI) : MCDisassembler(STI) {}
+
+  ~ARM64Disassembler() {}
+
+  /// getInstruction - See MCDisassembler.
+  MCDisassembler::DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
+                                              const MemoryObject &region,
+                                              uint64_t address,
+                                              raw_ostream &vStream,
+                                              raw_ostream &cStream) const;
+
+  /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
+  /// operand in place of the immediate Value in the MCInst.  The immediate
+  /// Value has not had any PC adjustment made by the caller. If the instruction
+  /// adds the PC to the immediate Value then InstsAddsAddressToValue is true,
+  /// else false.  If the getOpInfo() function was set as part of the
+  /// setupForSymbolicDisassembly() call then that function is called to get any
+  /// symbolic information at the Address for this instrution.  If that returns
+  /// non-zero then the symbolic information it returns is used to create an
+  /// MCExpr and that is added as an operand to the MCInst.  This function
+  /// returns true if it adds an operand to the MCInst and false otherwise.
+  bool tryAddingSymbolicOperand(uint64_t Address, int Value,
+                                bool InstsAddsAddressToValue, uint64_t InstSize,
+                                MCInst &MI, uint32_t insn = 0) const;
+};
+
+} // namespace llvm
+
+#endif

Added: llvm/trunk/lib/Target/ARM64/Disassembler/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/Disassembler/CMakeLists.txt?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/Disassembler/CMakeLists.txt (added)
+++ llvm/trunk/lib/Target/ARM64/Disassembler/CMakeLists.txt Sat Mar 29 05:18:08 2014
@@ -0,0 +1,13 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARM64Disassembler
+  ARM64Disassembler.cpp
+  )
+# workaround for hanging compilation on MSVC8, 9 and 10
+#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
+#set_property(
+#  SOURCE ARMDisassembler.cpp
+#  PROPERTY COMPILE_FLAGS "/Od"
+#  )
+#endif()
+add_dependencies(LLVMARM64Disassembler ARM64CommonTableGen)

Added: llvm/trunk/lib/Target/ARM64/Disassembler/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/Disassembler/LLVMBuild.txt?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/Disassembler/LLVMBuild.txt (added)
+++ llvm/trunk/lib/Target/ARM64/Disassembler/LLVMBuild.txt Sat Mar 29 05:18:08 2014
@@ -0,0 +1,24 @@
+;===- ./lib/Target/ARM64/Disassembler/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARM64Disassembler
+parent = ARM64
+required_libraries = ARM64Desc ARM64Info MC Support
+add_to_library_groups = ARM64
+

Added: llvm/trunk/lib/Target/ARM64/Disassembler/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/Disassembler/Makefile?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/Disassembler/Makefile (added)
+++ llvm/trunk/lib/Target/ARM64/Disassembler/Makefile Sat Mar 29 05:18:08 2014
@@ -0,0 +1,16 @@
+##===- lib/Target/ARM64/Disassembler/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARM64Disassembler
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common

Added: llvm/trunk/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp (added)
+++ llvm/trunk/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp Sat Mar 29 05:18:08 2014
@@ -0,0 +1,1428 @@
+//===-- ARM64InstPrinter.cpp - Convert ARM64 MCInst to assembly syntax ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARM64InstPrinter.h"
+#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "MCTargetDesc/ARM64BaseInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "ARM64GenAsmWriter.inc"
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "ARM64GenAsmWriter1.inc"
+
+ARM64InstPrinter::ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                                   const MCRegisterInfo &MRI,
+                                   const MCSubtargetInfo &STI)
+    : MCInstPrinter(MAI, MII, MRI) {
+  // Initialize the set of available features.
+  setAvailableFeatures(STI.getFeatureBits());
+}
+
+ARM64AppleInstPrinter::ARM64AppleInstPrinter(const MCAsmInfo &MAI,
+                                             const MCInstrInfo &MII,
+                                             const MCRegisterInfo &MRI,
+                                             const MCSubtargetInfo &STI)
+    : ARM64InstPrinter(MAI, MII, MRI, STI) {}
+
+void ARM64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  // This is for .cfi directives.
+  OS << getRegisterName(RegNo);
+}
+
+void ARM64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                 StringRef Annot) {
+  // Check for special encodings and print the cannonical alias instead.
+
+  unsigned Opcode = MI->getOpcode();
+
+  if (Opcode == ARM64::SYS || Opcode == ARM64::SYSxt)
+    if (printSysAlias(MI, O)) {
+      printAnnotation(O, Annot);
+      return;
+    }
+
+  // TBZ/TBNZ should print the register operand as a Wreg if the bit
+  // number is < 32.
+  if ((Opcode == ARM64::TBNZ || Opcode == ARM64::TBZ) &&
+      MI->getOperand(1).getImm() < 32) {
+    MCInst newMI = *MI;
+    unsigned Reg = MI->getOperand(0).getReg();
+    newMI.getOperand(0).setReg(getWRegFromXReg(Reg));
+    printInstruction(&newMI, O);
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  // SBFM/UBFM should print to a nicer aliased form if possible.
+  if (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri ||
+      Opcode == ARM64::UBFMXri || Opcode == ARM64::UBFMWri) {
+    const MCOperand &Op0 = MI->getOperand(0);
+    const MCOperand &Op1 = MI->getOperand(1);
+    const MCOperand &Op2 = MI->getOperand(2);
+    const MCOperand &Op3 = MI->getOperand(3);
+
+    if (Op2.isImm() && Op2.getImm() == 0 && Op3.isImm()) {
+      bool IsSigned = (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri);
+      const char *AsmMnemonic = 0;
+
+      switch (Op3.getImm()) {
+      default:
+        break;
+      case 7:
+        AsmMnemonic = IsSigned ? "sxtb" : "uxtb";
+        break;
+      case 15:
+        AsmMnemonic = IsSigned ? "sxth" : "uxth";
+        break;
+      case 31:
+        AsmMnemonic = IsSigned ? "sxtw" : "uxtw";
+        break;
+      }
+
+      if (AsmMnemonic) {
+        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
+          << ", " << getRegisterName(Op1.getReg());
+        printAnnotation(O, Annot);
+        return;
+      }
+    }
+
+    // All immediate shifts are aliases, implemented using the Bitfield
+    // instruction. In all cases the immediate shift amount shift must be in
+    // the range 0 to (reg.size -1).
+    if (Op2.isImm() && Op3.isImm()) {
+      const char *AsmMnemonic = 0;
+      int shift = 0;
+      int64_t immr = Op2.getImm();
+      int64_t imms = Op3.getImm();
+      if (Opcode == ARM64::UBFMWri && imms != 0x1F && ((imms + 1) == immr)) {
+        AsmMnemonic = "lsl";
+        shift = 31 - imms;
+      } else if (Opcode == ARM64::UBFMXri && imms != 0x3f &&
+                 ((imms + 1 == immr))) {
+        AsmMnemonic = "lsl";
+        shift = 63 - imms;
+      } else if (Opcode == ARM64::UBFMWri && imms == 0x1f) {
+        AsmMnemonic = "lsr";
+        shift = immr;
+      } else if (Opcode == ARM64::UBFMXri && imms == 0x3f) {
+        AsmMnemonic = "lsr";
+        shift = immr;
+      } else if (Opcode == ARM64::SBFMWri && imms == 0x1f) {
+        AsmMnemonic = "asr";
+        shift = immr;
+      } else if (Opcode == ARM64::SBFMXri && imms == 0x3f) {
+        AsmMnemonic = "asr";
+        shift = immr;
+      }
+      if (AsmMnemonic) {
+        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
+          << ", " << getRegisterName(Op1.getReg()) << ", #" << shift;
+        printAnnotation(O, Annot);
+        return;
+      }
+    }
+  }
+
+  // Symbolic operands for MOVZ, MOVN and MOVK already imply a shift
+  // (e.g. :gottprel_g1: is always going to be "lsl #16") so it should not be
+  // printed.
+  if ((Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi ||
+       Opcode == ARM64::MOVNXi || Opcode == ARM64::MOVNWi) &&
+      MI->getOperand(1).isExpr()) {
+    if (Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi)
+      O << "\tmovz\t";
+    else
+      O << "\tmovn\t";
+
+    O << getRegisterName(MI->getOperand(0).getReg()) << ", #"
+      << *MI->getOperand(1).getExpr();
+    return;
+  }
+
+  if ((Opcode == ARM64::MOVKXi || Opcode == ARM64::MOVKWi) &&
+      MI->getOperand(2).isExpr()) {
+    O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
+      << *MI->getOperand(2).getExpr();
+    return;
+  }
+
+  // ANDS WZR, Wn, #imm ==> TST Wn, #imm
+  // ANDS XZR, Xn, #imm ==> TST Xn, #imm
+  if (Opcode == ARM64::ANDSWri && MI->getOperand(0).getReg() == ARM64::WZR) {
+    O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
+    printLogicalImm32(MI, 2, O);
+    return;
+  }
+  if (Opcode == ARM64::ANDSXri && MI->getOperand(0).getReg() == ARM64::XZR) {
+    O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
+    printLogicalImm64(MI, 2, O);
+    return;
+  }
+  // ANDS WZR, Wn, Wm{, lshift #imm} ==> TST Wn{, lshift #imm}
+  // ANDS XZR, Xn, Xm{, lshift #imm} ==> TST Xn{, lshift #imm}
+  if ((Opcode == ARM64::ANDSWrs && MI->getOperand(0).getReg() == ARM64::WZR) ||
+      (Opcode == ARM64::ANDSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) {
+    O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
+    printShiftedRegister(MI, 2, O);
+    return;
+  }
+
+  // SUBS WZR, Wn, #imm ==> CMP Wn, #imm
+  // SUBS XZR, Xn, #imm ==> CMP Xn, #imm
+  if ((Opcode == ARM64::SUBSWri && MI->getOperand(0).getReg() == ARM64::WZR) ||
+      (Opcode == ARM64::SUBSXri && MI->getOperand(0).getReg() == ARM64::XZR)) {
+    O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
+    printAddSubImm(MI, 2, O);
+    return;
+  }
+  // SUBS WZR, Wn, Wm{, lshift #imm} ==> CMP Wn, Wm{, lshift #imm}
+  // SUBS XZR, Xn, Xm{, lshift #imm} ==> CMP Xn, Xm{, lshift #imm}
+  if ((Opcode == ARM64::SUBSWrs && MI->getOperand(0).getReg() == ARM64::WZR) ||
+      (Opcode == ARM64::SUBSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) {
+    O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
+    printShiftedRegister(MI, 2, O);
+    return;
+  }
+  // SUBS XZR, Xn, Wm, uxtb #imm ==> CMP Xn, uxtb #imm
+  // SUBS WZR, Wn, Xm, uxtb #imm ==> CMP Wn, uxtb #imm
+  if ((Opcode == ARM64::SUBSXrx && MI->getOperand(0).getReg() == ARM64::XZR) ||
+      (Opcode == ARM64::SUBSWrx && MI->getOperand(0).getReg() == ARM64::WZR)) {
+    O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
+    printExtendedRegister(MI, 2, O);
+    return;
+  }
+  // SUBS XZR, Xn, Xm, uxtx #imm ==> CMP Xn, uxtb #imm
+  if (Opcode == ARM64::SUBSXrx64 && MI->getOperand(0).getReg() == ARM64::XZR) {
+    O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "
+      << getRegisterName(MI->getOperand(2).getReg());
+    printExtend(MI, 3, O);
+    return;
+  }
+
+  // ADDS WZR, Wn, #imm ==> CMN Wn, #imm
+  // ADDS XZR, Xn, #imm ==> CMN Xn, #imm
+  if ((Opcode == ARM64::ADDSWri && MI->getOperand(0).getReg() == ARM64::WZR) ||
+      (Opcode == ARM64::ADDSXri && MI->getOperand(0).getReg() == ARM64::XZR)) {
+    O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
+    printAddSubImm(MI, 2, O);
+    return;
+  }
+  // ADDS WZR, Wn, Wm{, lshift #imm} ==> CMN Wn, Wm{, lshift #imm}
+  // ADDS XZR, Xn, Xm{, lshift #imm} ==> CMN Xn, Xm{, lshift #imm}
+  if ((Opcode == ARM64::ADDSWrs && MI->getOperand(0).getReg() == ARM64::WZR) ||
+      (Opcode == ARM64::ADDSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) {
+    O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
+    printShiftedRegister(MI, 2, O);
+    return;
+  }
+  // ADDS XZR, Xn, Wm, uxtb #imm ==> CMN Xn, uxtb #imm
+  if (Opcode == ARM64::ADDSXrx && MI->getOperand(0).getReg() == ARM64::XZR) {
+    O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
+    printExtendedRegister(MI, 2, O);
+    return;
+  }
+  // ADDS XZR, Xn, Xm, uxtx #imm ==> CMN Xn, uxtb #imm
+  if (Opcode == ARM64::ADDSXrx64 && MI->getOperand(0).getReg() == ARM64::XZR) {
+    O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "
+      << getRegisterName(MI->getOperand(2).getReg());
+    printExtend(MI, 3, O);
+    return;
+  }
+
+  if (!printAliasInstr(MI, O))
+    printInstruction(MI, O);
+
+  printAnnotation(O, Annot);
+}
+
+static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout,
+                                bool &IsTbx) {
+  switch (Opcode) {
+  case ARM64::TBXv8i8One:
+  case ARM64::TBXv8i8Two:
+  case ARM64::TBXv8i8Three:
+  case ARM64::TBXv8i8Four:
+    IsTbx = true;
+    Layout = ".8b";
+    return true;
+  case ARM64::TBLv8i8One:
+  case ARM64::TBLv8i8Two:
+  case ARM64::TBLv8i8Three:
+  case ARM64::TBLv8i8Four:
+    IsTbx = false;
+    Layout = ".8b";
+    return true;
+  case ARM64::TBXv16i8One:
+  case ARM64::TBXv16i8Two:
+  case ARM64::TBXv16i8Three:
+  case ARM64::TBXv16i8Four:
+    IsTbx = true;
+    Layout = ".16b";
+    return true;
+  case ARM64::TBLv16i8One:
+  case ARM64::TBLv16i8Two:
+  case ARM64::TBLv16i8Three:
+  case ARM64::TBLv16i8Four:
+    IsTbx = false;
+    Layout = ".16b";
+    return true;
+  default:
+    return false;
+  }
+}
+
+struct LdStNInstrDesc {
+  unsigned Opcode;
+  const char *Mnemonic;
+  const char *Layout;
+  int LaneOperand;
+  int NaturalOffset;
+};
+
+static LdStNInstrDesc LdStNInstInfo[] = {
+  { ARM64::LD1i8,             "ld1",  ".b",     2, 0  },
+  { ARM64::LD1i16,            "ld1",  ".h",     2, 0  },
+  { ARM64::LD1i32,            "ld1",  ".s",     2, 0  },
+  { ARM64::LD1i64,            "ld1",  ".d",     2, 0  },
+  { ARM64::LD1i8_POST,        "ld1",  ".b",     2, 1  },
+  { ARM64::LD1i16_POST,       "ld1",  ".h",     2, 2  },
+  { ARM64::LD1i32_POST,       "ld1",  ".s",     2, 4  },
+  { ARM64::LD1i64_POST,       "ld1",  ".d",     2, 8  },
+  { ARM64::LD1Rv16b,          "ld1r", ".16b",   0, 0  },
+  { ARM64::LD1Rv8h,           "ld1r", ".8h",    0, 0  },
+  { ARM64::LD1Rv4s,           "ld1r", ".4s",    0, 0  },
+  { ARM64::LD1Rv2d,           "ld1r", ".2d",    0, 0  },
+  { ARM64::LD1Rv8b,           "ld1r", ".8b",    0, 0  },
+  { ARM64::LD1Rv4h,           "ld1r", ".4h",    0, 0  },
+  { ARM64::LD1Rv2s,           "ld1r", ".2s",    0, 0  },
+  { ARM64::LD1Rv1d,           "ld1r", ".1d",    0, 0  },
+  { ARM64::LD1Rv16b_POST,     "ld1r", ".16b",   0, 1  },
+  { ARM64::LD1Rv8h_POST,      "ld1r", ".8h",    0, 2  },
+  { ARM64::LD1Rv4s_POST,      "ld1r", ".4s",    0, 4  },
+  { ARM64::LD1Rv2d_POST,      "ld1r", ".2d",    0, 8  },
+  { ARM64::LD1Rv8b_POST,      "ld1r", ".8b",    0, 1  },
+  { ARM64::LD1Rv4h_POST,      "ld1r", ".4h",    0, 2  },
+  { ARM64::LD1Rv2s_POST,      "ld1r", ".2s",    0, 4  },
+  { ARM64::LD1Rv1d_POST,      "ld1r", ".1d",    0, 8  },
+  { ARM64::LD1Onev16b,        "ld1",  ".16b",   0, 0  },
+  { ARM64::LD1Onev8h,         "ld1",  ".8h",    0, 0  },
+  { ARM64::LD1Onev4s,         "ld1",  ".4s",    0, 0  },
+  { ARM64::LD1Onev2d,         "ld1",  ".2d",    0, 0  },
+  { ARM64::LD1Onev8b,         "ld1",  ".8b",    0, 0  },
+  { ARM64::LD1Onev4h,         "ld1",  ".4h",    0, 0  },
+  { ARM64::LD1Onev2s,         "ld1",  ".2s",    0, 0  },
+  { ARM64::LD1Onev1d,         "ld1",  ".1d",    0, 0  },
+  { ARM64::LD1Onev16b_POST,   "ld1",  ".16b",   0, 16 },
+  { ARM64::LD1Onev8h_POST,    "ld1",  ".8h",    0, 16 },
+  { ARM64::LD1Onev4s_POST,    "ld1",  ".4s",    0, 16 },
+  { ARM64::LD1Onev2d_POST,    "ld1",  ".2d",    0, 16 },
+  { ARM64::LD1Onev8b_POST,    "ld1",  ".8b",    0, 8  },
+  { ARM64::LD1Onev4h_POST,    "ld1",  ".4h",    0, 8  },
+  { ARM64::LD1Onev2s_POST,    "ld1",  ".2s",    0, 8  },
+  { ARM64::LD1Onev1d_POST,    "ld1",  ".1d",    0, 8  },
+  { ARM64::LD1Twov16b,        "ld1",  ".16b",   0, 0  },
+  { ARM64::LD1Twov8h,         "ld1",  ".8h",    0, 0  },
+  { ARM64::LD1Twov4s,         "ld1",  ".4s",    0, 0  },
+  { ARM64::LD1Twov2d,         "ld1",  ".2d",    0, 0  },
+  { ARM64::LD1Twov8b,         "ld1",  ".8b",    0, 0  },
+  { ARM64::LD1Twov4h,         "ld1",  ".4h",    0, 0  },
+  { ARM64::LD1Twov2s,         "ld1",  ".2s",    0, 0  },
+  { ARM64::LD1Twov1d,         "ld1",  ".1d",    0, 0  },
+  { ARM64::LD1Twov16b_POST,   "ld1",  ".16b",   0, 32 },
+  { ARM64::LD1Twov8h_POST,    "ld1",  ".8h",    0, 32 },
+  { ARM64::LD1Twov4s_POST,    "ld1",  ".4s",    0, 32 },
+  { ARM64::LD1Twov2d_POST,    "ld1",  ".2d",    0, 32 },
+  { ARM64::LD1Twov8b_POST,    "ld1",  ".8b",    0, 16 },
+  { ARM64::LD1Twov4h_POST,    "ld1",  ".4h",    0, 16 },
+  { ARM64::LD1Twov2s_POST,    "ld1",  ".2s",    0, 16 },
+  { ARM64::LD1Twov1d_POST,    "ld1",  ".1d",    0, 16 },
+  { ARM64::LD1Threev16b,      "ld1",  ".16b",   0, 0  },
+  { ARM64::LD1Threev8h,       "ld1",  ".8h",    0, 0  },
+  { ARM64::LD1Threev4s,       "ld1",  ".4s",    0, 0  },
+  { ARM64::LD1Threev2d,       "ld1",  ".2d",    0, 0  },
+  { ARM64::LD1Threev8b,       "ld1",  ".8b",    0, 0  },
+  { ARM64::LD1Threev4h,       "ld1",  ".4h",    0, 0  },
+  { ARM64::LD1Threev2s,       "ld1",  ".2s",    0, 0  },
+  { ARM64::LD1Threev1d,       "ld1",  ".1d",    0, 0  },
+  { ARM64::LD1Threev16b_POST, "ld1",  ".16b",   0, 48 },
+  { ARM64::LD1Threev8h_POST,  "ld1",  ".8h",    0, 48 },
+  { ARM64::LD1Threev4s_POST,  "ld1",  ".4s",    0, 48 },
+  { ARM64::LD1Threev2d_POST,  "ld1",  ".2d",    0, 48 },
+  { ARM64::LD1Threev8b_POST,  "ld1",  ".8b",    0, 24 },
+  { ARM64::LD1Threev4h_POST,  "ld1",  ".4h",    0, 24 },
+  { ARM64::LD1Threev2s_POST,  "ld1",  ".2s",    0, 24 },
+  { ARM64::LD1Threev1d_POST,  "ld1",  ".1d",    0, 24 },
+  { ARM64::LD1Fourv16b,       "ld1",  ".16b",   0, 0  },
+  { ARM64::LD1Fourv8h,        "ld1",  ".8h",    0, 0  },
+  { ARM64::LD1Fourv4s,        "ld1",  ".4s",    0, 0  },
+  { ARM64::LD1Fourv2d,        "ld1",  ".2d",    0, 0  },
+  { ARM64::LD1Fourv8b,        "ld1",  ".8b",    0, 0  },
+  { ARM64::LD1Fourv4h,        "ld1",  ".4h",    0, 0  },
+  { ARM64::LD1Fourv2s,        "ld1",  ".2s",    0, 0  },
+  { ARM64::LD1Fourv1d,        "ld1",  ".1d",    0, 0  },
+  { ARM64::LD1Fourv16b_POST,  "ld1",  ".16b",   0, 64 },
+  { ARM64::LD1Fourv8h_POST,   "ld1",  ".8h",    0, 64 },
+  { ARM64::LD1Fourv4s_POST,   "ld1",  ".4s",    0, 64 },
+  { ARM64::LD1Fourv2d_POST,   "ld1",  ".2d",    0, 64 },
+  { ARM64::LD1Fourv8b_POST,   "ld1",  ".8b",    0, 32 },
+  { ARM64::LD1Fourv4h_POST,   "ld1",  ".4h",    0, 32 },
+  { ARM64::LD1Fourv2s_POST,   "ld1",  ".2s",    0, 32 },
+  { ARM64::LD1Fourv1d_POST,   "ld1",  ".1d",    0, 32 },
+  { ARM64::LD2i8,             "ld2",  ".b",     2, 0  },
+  { ARM64::LD2i16,            "ld2",  ".h",     2, 0  },
+  { ARM64::LD2i32,            "ld2",  ".s",     2, 0  },
+  { ARM64::LD2i64,            "ld2",  ".d",     2, 0  },
+  { ARM64::LD2i8_POST,        "ld2",  ".b",     2, 2  },
+  { ARM64::LD2i16_POST,       "ld2",  ".h",     2, 4  },
+  { ARM64::LD2i32_POST,       "ld2",  ".s",     2, 8  },
+  { ARM64::LD2i64_POST,       "ld2",  ".d",     2, 16  },
+  { ARM64::LD2Rv16b,          "ld2r", ".16b",   0, 0  },
+  { ARM64::LD2Rv8h,           "ld2r", ".8h",    0, 0  },
+  { ARM64::LD2Rv4s,           "ld2r", ".4s",    0, 0  },
+  { ARM64::LD2Rv2d,           "ld2r", ".2d",    0, 0  },
+  { ARM64::LD2Rv8b,           "ld2r", ".8b",    0, 0  },
+  { ARM64::LD2Rv4h,           "ld2r", ".4h",    0, 0  },
+  { ARM64::LD2Rv2s,           "ld2r", ".2s",    0, 0  },
+  { ARM64::LD2Rv1d,           "ld2r", ".1d",    0, 0  },
+  { ARM64::LD2Rv16b_POST,     "ld2r", ".16b",   0, 2  },
+  { ARM64::LD2Rv8h_POST,      "ld2r", ".8h",    0, 4  },
+  { ARM64::LD2Rv4s_POST,      "ld2r", ".4s",    0, 8  },
+  { ARM64::LD2Rv2d_POST,      "ld2r", ".2d",    0, 16 },
+  { ARM64::LD2Rv8b_POST,      "ld2r", ".8b",    0, 2  },
+  { ARM64::LD2Rv4h_POST,      "ld2r", ".4h",    0, 4  },
+  { ARM64::LD2Rv2s_POST,      "ld2r", ".2s",    0, 8  },
+  { ARM64::LD2Rv1d_POST,      "ld2r", ".1d",    0, 16 },
+  { ARM64::LD2Twov16b,        "ld2",  ".16b",   0, 0  },
+  { ARM64::LD2Twov8h,         "ld2",  ".8h",    0, 0  },
+  { ARM64::LD2Twov4s,         "ld2",  ".4s",    0, 0  },
+  { ARM64::LD2Twov2d,         "ld2",  ".2d",    0, 0  },
+  { ARM64::LD2Twov8b,         "ld2",  ".8b",    0, 0  },
+  { ARM64::LD2Twov4h,         "ld2",  ".4h",    0, 0  },
+  { ARM64::LD2Twov2s,         "ld2",  ".2s",    0, 0  },
+  { ARM64::LD2Twov16b_POST,   "ld2",  ".16b",   0, 32 },
+  { ARM64::LD2Twov8h_POST,    "ld2",  ".8h",    0, 32 },
+  { ARM64::LD2Twov4s_POST,    "ld2",  ".4s",    0, 32 },
+  { ARM64::LD2Twov2d_POST,    "ld2",  ".2d",    0, 32 },
+  { ARM64::LD2Twov8b_POST,    "ld2",  ".8b",    0, 16 },
+  { ARM64::LD2Twov4h_POST,    "ld2",  ".4h",    0, 16 },
+  { ARM64::LD2Twov2s_POST,    "ld2",  ".2s",    0, 16 },
+  { ARM64::LD3i8,             "ld3",  ".b",     2, 0  },
+  { ARM64::LD3i16,            "ld3",  ".h",     2, 0  },
+  { ARM64::LD3i32,            "ld3",  ".s",     2, 0  },
+  { ARM64::LD3i64,            "ld3",  ".d",     2, 0  },
+  { ARM64::LD3i8_POST,        "ld3",  ".b",     2, 3  },
+  { ARM64::LD3i16_POST,       "ld3",  ".h",     2, 6  },
+  { ARM64::LD3i32_POST,       "ld3",  ".s",     2, 12  },
+  { ARM64::LD3i64_POST,       "ld3",  ".d",     2, 24  },
+  { ARM64::LD3Rv16b,          "ld3r", ".16b",   0, 0  },
+  { ARM64::LD3Rv8h,           "ld3r", ".8h",    0, 0  },
+  { ARM64::LD3Rv4s,           "ld3r", ".4s",    0, 0  },
+  { ARM64::LD3Rv2d,           "ld3r", ".2d",    0, 0  },
+  { ARM64::LD3Rv8b,           "ld3r", ".8b",    0, 0  },
+  { ARM64::LD3Rv4h,           "ld3r", ".4h",    0, 0  },
+  { ARM64::LD3Rv2s,           "ld3r", ".2s",    0, 0  },
+  { ARM64::LD3Rv1d,           "ld3r", ".1d",    0, 0  },
+  { ARM64::LD3Rv16b_POST,     "ld3r", ".16b",   0, 3  },
+  { ARM64::LD3Rv8h_POST,      "ld3r", ".8h",    0, 6  },
+  { ARM64::LD3Rv4s_POST,      "ld3r", ".4s",    0, 12 },
+  { ARM64::LD3Rv2d_POST,      "ld3r", ".2d",    0, 24 },
+  { ARM64::LD3Rv8b_POST,      "ld3r", ".8b",    0, 3  },
+  { ARM64::LD3Rv4h_POST,      "ld3r", ".4h",    0, 6  },
+  { ARM64::LD3Rv2s_POST,      "ld3r", ".2s",    0, 12 },
+  { ARM64::LD3Rv1d_POST,      "ld3r", ".1d",    0, 24 },
+  { ARM64::LD3Threev16b,      "ld3",  ".16b",   0, 0  },
+  { ARM64::LD3Threev8h,       "ld3",  ".8h",    0, 0  },
+  { ARM64::LD3Threev4s,       "ld3",  ".4s",    0, 0  },
+  { ARM64::LD3Threev2d,       "ld3",  ".2d",    0, 0  },
+  { ARM64::LD3Threev8b,       "ld3",  ".8b",    0, 0  },
+  { ARM64::LD3Threev4h,       "ld3",  ".4h",    0, 0  },
+  { ARM64::LD3Threev2s,       "ld3",  ".2s",    0, 0  },
+  { ARM64::LD3Threev16b_POST, "ld3",  ".16b",   0, 48 },
+  { ARM64::LD3Threev8h_POST,  "ld3",  ".8h",    0, 48 },
+  { ARM64::LD3Threev4s_POST,  "ld3",  ".4s",    0, 48 },
+  { ARM64::LD3Threev2d_POST,  "ld3",  ".2d",    0, 48 },
+  { ARM64::LD3Threev8b_POST,  "ld3",  ".8b",    0, 24 },
+  { ARM64::LD3Threev4h_POST,  "ld3",  ".4h",    0, 24 },
+  { ARM64::LD3Threev2s_POST,  "ld3",  ".2s",    0, 24 },
+  { ARM64::LD4i8,             "ld4",  ".b",     2, 0  },
+  { ARM64::LD4i16,            "ld4",  ".h",     2, 0  },
+  { ARM64::LD4i32,            "ld4",  ".s",     2, 0  },
+  { ARM64::LD4i64,            "ld4",  ".d",     2, 0  },
+  { ARM64::LD4i8_POST,        "ld4",  ".b",     2, 4  },
+  { ARM64::LD4i16_POST,       "ld4",  ".h",     2, 8  },
+  { ARM64::LD4i32_POST,       "ld4",  ".s",     2, 16 },
+  { ARM64::LD4i64_POST,       "ld4",  ".d",     2, 32 },
+  { ARM64::LD4Rv16b,          "ld4r", ".16b",   0, 0  },
+  { ARM64::LD4Rv8h,           "ld4r", ".8h",    0, 0  },
+  { ARM64::LD4Rv4s,           "ld4r", ".4s",    0, 0  },
+  { ARM64::LD4Rv2d,           "ld4r", ".2d",    0, 0  },
+  { ARM64::LD4Rv8b,           "ld4r", ".8b",    0, 0  },
+  { ARM64::LD4Rv4h,           "ld4r", ".4h",    0, 0  },
+  { ARM64::LD4Rv2s,           "ld4r", ".2s",    0, 0  },
+  { ARM64::LD4Rv1d,           "ld4r", ".1d",    0, 0  },
+  { ARM64::LD4Rv16b_POST,     "ld4r", ".16b",   0, 4  },
+  { ARM64::LD4Rv8h_POST,      "ld4r", ".8h",    0, 8  },
+  { ARM64::LD4Rv4s_POST,      "ld4r", ".4s",    0, 16 },
+  { ARM64::LD4Rv2d_POST,      "ld4r", ".2d",    0, 32 },
+  { ARM64::LD4Rv8b_POST,      "ld4r", ".8b",    0, 4  },
+  { ARM64::LD4Rv4h_POST,      "ld4r", ".4h",    0, 8  },
+  { ARM64::LD4Rv2s_POST,      "ld4r", ".2s",    0, 16 },
+  { ARM64::LD4Rv1d_POST,      "ld4r", ".1d",    0, 32 },
+  { ARM64::LD4Fourv16b,       "ld4",  ".16b",   0, 0  },
+  { ARM64::LD4Fourv8h,        "ld4",  ".8h",    0, 0  },
+  { ARM64::LD4Fourv4s,        "ld4",  ".4s",    0, 0  },
+  { ARM64::LD4Fourv2d,        "ld4",  ".2d",    0, 0  },
+  { ARM64::LD4Fourv8b,        "ld4",  ".8b",    0, 0  },
+  { ARM64::LD4Fourv4h,        "ld4",  ".4h",    0, 0  },
+  { ARM64::LD4Fourv2s,        "ld4",  ".2s",    0, 0  },
+  { ARM64::LD4Fourv16b_POST,  "ld4",  ".16b",   0, 64 },
+  { ARM64::LD4Fourv8h_POST,   "ld4",  ".8h",    0, 64 },
+  { ARM64::LD4Fourv4s_POST,   "ld4",  ".4s",    0, 64 },
+  { ARM64::LD4Fourv2d_POST,   "ld4",  ".2d",    0, 64 },
+  { ARM64::LD4Fourv8b_POST,   "ld4",  ".8b",    0, 32 },
+  { ARM64::LD4Fourv4h_POST,   "ld4",  ".4h",    0, 32 },
+  { ARM64::LD4Fourv2s_POST,   "ld4",  ".2s",    0, 32 },
+  { ARM64::ST1i8,             "st1",  ".b",     1, 0  },
+  { ARM64::ST1i16,            "st1",  ".h",     1, 0  },
+  { ARM64::ST1i32,            "st1",  ".s",     1, 0  },
+  { ARM64::ST1i64,            "st1",  ".d",     1, 0  },
+  { ARM64::ST1i8_POST,        "st1",  ".b",     1, 1  },
+  { ARM64::ST1i16_POST,       "st1",  ".h",     1, 2  },
+  { ARM64::ST1i32_POST,       "st1",  ".s",     1, 4  },
+  { ARM64::ST1i64_POST,       "st1",  ".d",     1, 8  },
+  { ARM64::ST1Onev16b,        "st1",  ".16b",   0, 0  },
+  { ARM64::ST1Onev8h,         "st1",  ".8h",    0, 0  },
+  { ARM64::ST1Onev4s,         "st1",  ".4s",    0, 0  },
+  { ARM64::ST1Onev2d,         "st1",  ".2d",    0, 0  },
+  { ARM64::ST1Onev8b,         "st1",  ".8b",    0, 0  },
+  { ARM64::ST1Onev4h,         "st1",  ".4h",    0, 0  },
+  { ARM64::ST1Onev2s,         "st1",  ".2s",    0, 0  },
+  { ARM64::ST1Onev1d,         "st1",  ".1d",    0, 0  },
+  { ARM64::ST1Onev16b_POST,   "st1",  ".16b",   0, 16 },
+  { ARM64::ST1Onev8h_POST,    "st1",  ".8h",    0, 16 },
+  { ARM64::ST1Onev4s_POST,    "st1",  ".4s",    0, 16 },
+  { ARM64::ST1Onev2d_POST,    "st1",  ".2d",    0, 16 },
+  { ARM64::ST1Onev8b_POST,    "st1",  ".8b",    0, 8  },
+  { ARM64::ST1Onev4h_POST,    "st1",  ".4h",    0, 8  },
+  { ARM64::ST1Onev2s_POST,    "st1",  ".2s",    0, 8  },
+  { ARM64::ST1Onev1d_POST,    "st1",  ".1d",    0, 8  },
+  { ARM64::ST1Twov16b,        "st1",  ".16b",   0, 0  },
+  { ARM64::ST1Twov8h,         "st1",  ".8h",    0, 0  },
+  { ARM64::ST1Twov4s,         "st1",  ".4s",    0, 0  },
+  { ARM64::ST1Twov2d,         "st1",  ".2d",    0, 0  },
+  { ARM64::ST1Twov8b,         "st1",  ".8b",    0, 0  },
+  { ARM64::ST1Twov4h,         "st1",  ".4h",    0, 0  },
+  { ARM64::ST1Twov2s,         "st1",  ".2s",    0, 0  },
+  { ARM64::ST1Twov1d,         "st1",  ".1d",    0, 0  },
+  { ARM64::ST1Twov16b_POST,   "st1",  ".16b",   0, 32 },
+  { ARM64::ST1Twov8h_POST,    "st1",  ".8h",    0, 32 },
+  { ARM64::ST1Twov4s_POST,    "st1",  ".4s",    0, 32 },
+  { ARM64::ST1Twov2d_POST,    "st1",  ".2d",    0, 32 },
+  { ARM64::ST1Twov8b_POST,    "st1",  ".8b",    0, 16 },
+  { ARM64::ST1Twov4h_POST,    "st1",  ".4h",    0, 16 },
+  { ARM64::ST1Twov2s_POST,    "st1",  ".2s",    0, 16 },
+  { ARM64::ST1Twov1d_POST,    "st1",  ".1d",    0, 16 },
+  { ARM64::ST1Threev16b,      "st1",  ".16b",   0, 0  },
+  { ARM64::ST1Threev8h,       "st1",  ".8h",    0, 0  },
+  { ARM64::ST1Threev4s,       "st1",  ".4s",    0, 0  },
+  { ARM64::ST1Threev2d,       "st1",  ".2d",    0, 0  },
+  { ARM64::ST1Threev8b,       "st1",  ".8b",    0, 0  },
+  { ARM64::ST1Threev4h,       "st1",  ".4h",    0, 0  },
+  { ARM64::ST1Threev2s,       "st1",  ".2s",    0, 0  },
+  { ARM64::ST1Threev1d,       "st1",  ".1d",    0, 0  },
+  { ARM64::ST1Threev16b_POST, "st1",  ".16b",   0, 48 },
+  { ARM64::ST1Threev8h_POST,  "st1",  ".8h",    0, 48 },
+  { ARM64::ST1Threev4s_POST,  "st1",  ".4s",    0, 48 },
+  { ARM64::ST1Threev2d_POST,  "st1",  ".2d",    0, 48 },
+  { ARM64::ST1Threev8b_POST,  "st1",  ".8b",    0, 24 },
+  { ARM64::ST1Threev4h_POST,  "st1",  ".4h",    0, 24 },
+  { ARM64::ST1Threev2s_POST,  "st1",  ".2s",    0, 24 },
+  { ARM64::ST1Threev1d_POST,  "st1",  ".1d",    0, 24 },
+  { ARM64::ST1Fourv16b,       "st1",  ".16b",   0, 0  },
+  { ARM64::ST1Fourv8h,        "st1",  ".8h",    0, 0  },
+  { ARM64::ST1Fourv4s,        "st1",  ".4s",    0, 0  },
+  { ARM64::ST1Fourv2d,        "st1",  ".2d",    0, 0  },
+  { ARM64::ST1Fourv8b,        "st1",  ".8b",    0, 0  },
+  { ARM64::ST1Fourv4h,        "st1",  ".4h",    0, 0  },
+  { ARM64::ST1Fourv2s,        "st1",  ".2s",    0, 0  },
+  { ARM64::ST1Fourv1d,        "st1",  ".1d",    0, 0  },
+  { ARM64::ST1Fourv16b_POST,  "st1",  ".16b",   0, 64 },
+  { ARM64::ST1Fourv8h_POST,   "st1",  ".8h",    0, 64 },
+  { ARM64::ST1Fourv4s_POST,   "st1",  ".4s",    0, 64 },
+  { ARM64::ST1Fourv2d_POST,   "st1",  ".2d",    0, 64 },
+  { ARM64::ST1Fourv8b_POST,   "st1",  ".8b",    0, 32 },
+  { ARM64::ST1Fourv4h_POST,   "st1",  ".4h",    0, 32 },
+  { ARM64::ST1Fourv2s_POST,   "st1",  ".2s",    0, 32 },
+  { ARM64::ST1Fourv1d_POST,   "st1",  ".1d",    0, 32 },
+  { ARM64::ST2i8,             "st2",  ".b",     1, 0  },
+  { ARM64::ST2i16,            "st2",  ".h",     1, 0  },
+  { ARM64::ST2i32,            "st2",  ".s",     1, 0  },
+  { ARM64::ST2i64,            "st2",  ".d",     1, 0  },
+  { ARM64::ST2i8_POST,        "st2",  ".b",     1, 2  },
+  { ARM64::ST2i16_POST,       "st2",  ".h",     1, 4  },
+  { ARM64::ST2i32_POST,       "st2",  ".s",     1, 8  },
+  { ARM64::ST2i64_POST,       "st2",  ".d",     1, 16 },
+  { ARM64::ST2Twov16b,        "st2",  ".16b",   0, 0  },
+  { ARM64::ST2Twov8h,         "st2",  ".8h",    0, 0  },
+  { ARM64::ST2Twov4s,         "st2",  ".4s",    0, 0  },
+  { ARM64::ST2Twov2d,         "st2",  ".2d",    0, 0  },
+  { ARM64::ST2Twov8b,         "st2",  ".8b",    0, 0  },
+  { ARM64::ST2Twov4h,         "st2",  ".4h",    0, 0  },
+  { ARM64::ST2Twov2s,         "st2",  ".2s",    0, 0  },
+  { ARM64::ST2Twov16b_POST,   "st2",  ".16b",   0, 32 },
+  { ARM64::ST2Twov8h_POST,    "st2",  ".8h",    0, 32 },
+  { ARM64::ST2Twov4s_POST,    "st2",  ".4s",    0, 32 },
+  { ARM64::ST2Twov2d_POST,    "st2",  ".2d",    0, 32 },
+  { ARM64::ST2Twov8b_POST,    "st2",  ".8b",    0, 16 },
+  { ARM64::ST2Twov4h_POST,    "st2",  ".4h",    0, 16 },
+  { ARM64::ST2Twov2s_POST,    "st2",  ".2s",    0, 16 },
+  { ARM64::ST3i8,             "st3",  ".b",     1, 0  },
+  { ARM64::ST3i16,            "st3",  ".h",     1, 0  },
+  { ARM64::ST3i32,            "st3",  ".s",     1, 0  },
+  { ARM64::ST3i64,            "st3",  ".d",     1, 0  },
+  { ARM64::ST3i8_POST,        "st3",  ".b",     1, 3  },
+  { ARM64::ST3i16_POST,       "st3",  ".h",     1, 6  },
+  { ARM64::ST3i32_POST,       "st3",  ".s",     1, 12 },
+  { ARM64::ST3i64_POST,       "st3",  ".d",     1, 24 },
+  { ARM64::ST3Threev16b,      "st3",  ".16b",   0, 0  },
+  { ARM64::ST3Threev8h,       "st3",  ".8h",    0, 0  },
+  { ARM64::ST3Threev4s,       "st3",  ".4s",    0, 0  },
+  { ARM64::ST3Threev2d,       "st3",  ".2d",    0, 0  },
+  { ARM64::ST3Threev8b,       "st3",  ".8b",    0, 0  },
+  { ARM64::ST3Threev4h,       "st3",  ".4h",    0, 0  },
+  { ARM64::ST3Threev2s,       "st3",  ".2s",    0, 0  },
+  { ARM64::ST3Threev16b_POST, "st3",  ".16b",   0, 48 },
+  { ARM64::ST3Threev8h_POST,  "st3",  ".8h",    0, 48 },
+  { ARM64::ST3Threev4s_POST,  "st3",  ".4s",    0, 48 },
+  { ARM64::ST3Threev2d_POST,  "st3",  ".2d",    0, 48 },
+  { ARM64::ST3Threev8b_POST,  "st3",  ".8b",    0, 24 },
+  { ARM64::ST3Threev4h_POST,  "st3",  ".4h",    0, 24 },
+  { ARM64::ST3Threev2s_POST,  "st3",  ".2s",    0, 24 },
+  { ARM64::ST4i8,             "st4",  ".b",     1, 0  },
+  { ARM64::ST4i16,            "st4",  ".h",     1, 0  },
+  { ARM64::ST4i32,            "st4",  ".s",     1, 0  },
+  { ARM64::ST4i64,            "st4",  ".d",     1, 0  },
+  { ARM64::ST4i8_POST,        "st4",  ".b",     1, 4  },
+  { ARM64::ST4i16_POST,       "st4",  ".h",     1, 8  },
+  { ARM64::ST4i32_POST,       "st4",  ".s",     1, 16 },
+  { ARM64::ST4i64_POST,       "st4",  ".d",     1, 32 },
+  { ARM64::ST4Fourv16b,       "st4",  ".16b",   0, 0  },
+  { ARM64::ST4Fourv8h,        "st4",  ".8h",    0, 0  },
+  { ARM64::ST4Fourv4s,        "st4",  ".4s",    0, 0  },
+  { ARM64::ST4Fourv2d,        "st4",  ".2d",    0, 0  },
+  { ARM64::ST4Fourv8b,        "st4",  ".8b",    0, 0  },
+  { ARM64::ST4Fourv4h,        "st4",  ".4h",    0, 0  },
+  { ARM64::ST4Fourv2s,        "st4",  ".2s",    0, 0  },
+  { ARM64::ST4Fourv16b_POST,  "st4",  ".16b",   0, 64 },
+  { ARM64::ST4Fourv8h_POST,   "st4",  ".8h",    0, 64 },
+  { ARM64::ST4Fourv4s_POST,   "st4",  ".4s",    0, 64 },
+  { ARM64::ST4Fourv2d_POST,   "st4",  ".2d",    0, 64 },
+  { ARM64::ST4Fourv8b_POST,   "st4",  ".8b",    0, 32 },
+  { ARM64::ST4Fourv4h_POST,   "st4",  ".4h",    0, 32 },
+  { ARM64::ST4Fourv2s_POST,   "st4",  ".2s",    0, 32 },
+};
+
+static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
+  unsigned Idx;
+  for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx)
+    if (LdStNInstInfo[Idx].Opcode == Opcode)
+      return &LdStNInstInfo[Idx];
+
+  return 0;
+}
+
+void ARM64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                      StringRef Annot) {
+  unsigned Opcode = MI->getOpcode();
+  StringRef Layout, Mnemonic;
+
+  bool IsTbx;
+  if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) {
+    O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t'
+      << getRegisterName(MI->getOperand(0).getReg(), ARM64::vreg) << ", ";
+
+    unsigned ListOpNum = IsTbx ? 2 : 1;
+    printVectorList(MI, ListOpNum, O, "");
+
+    O << ", "
+      << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), ARM64::vreg);
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
+    O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t';
+
+    // Now onto the operands: first a vector list with possible lane
+    // specifier. E.g. { v0 }[2]
+    printVectorList(MI, 0, O, "");
+
+    if (LdStDesc->LaneOperand != 0)
+      O << '[' << MI->getOperand(LdStDesc->LaneOperand).getImm() << ']';
+
+    // Next the address: [xN]
+    unsigned AddrOpNum = LdStDesc->LaneOperand + 1;
+    unsigned AddrReg = MI->getOperand(AddrOpNum).getReg();
+    O << ", [" << getRegisterName(AddrReg) << ']';
+
+    // Finally, there might be a post-indexed offset.
+    if (LdStDesc->NaturalOffset != 0) {
+      unsigned Reg = MI->getOperand(AddrOpNum + 1).getReg();
+      if (Reg != ARM64::XZR)
+        O << ", " << getRegisterName(Reg);
+      else {
+        assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?");
+        O << ", #" << LdStDesc->NaturalOffset;
+      }
+    }
+
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  ARM64InstPrinter::printInst(MI, O, Annot);
+}
+
+bool ARM64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
+#ifndef NDEBUG
+  unsigned Opcode = MI->getOpcode();
+  assert((Opcode == ARM64::SYS || Opcode == ARM64::SYSxt) &&
+         "Invalid opcode for SYS alias!");
+#endif
+
+  const char *Asm = 0;
+  const MCOperand &Op1 = MI->getOperand(0);
+  const MCOperand &Cn = MI->getOperand(1);
+  const MCOperand &Cm = MI->getOperand(2);
+  const MCOperand &Op2 = MI->getOperand(3);
+
+  unsigned Op1Val = Op1.getImm();
+  unsigned CnVal = Cn.getImm();
+  unsigned CmVal = Cm.getImm();
+  unsigned Op2Val = Op2.getImm();
+
+  if (CnVal == 7) {
+    switch (CmVal) {
+    default:
+      break;
+
+    // IC aliases
+    case 1:
+      if (Op1Val == 0 && Op2Val == 0)
+        Asm = "ic\tialluis";
+      break;
+    case 5:
+      if (Op1Val == 0 && Op2Val == 0)
+        Asm = "ic\tiallu";
+      else if (Op1Val == 3 && Op2Val == 1)
+        Asm = "ic\tivau";
+      break;
+
+    // DC aliases
+    case 4:
+      if (Op1Val == 3 && Op2Val == 1)
+        Asm = "dc\tzva";
+      break;
+    case 6:
+      if (Op1Val == 0 && Op2Val == 1)
+        Asm = "dc\tivac";
+      if (Op1Val == 0 && Op2Val == 2)
+        Asm = "dc\tisw";
+      break;
+    case 10:
+      if (Op1Val == 3 && Op2Val == 1)
+        Asm = "dc\tcvac";
+      else if (Op1Val == 0 && Op2Val == 2)
+        Asm = "dc\tcsw";
+      break;
+    case 11:
+      if (Op1Val == 3 && Op2Val == 1)
+        Asm = "dc\tcvau";
+      break;
+    case 14:
+      if (Op1Val == 3 && Op2Val == 1)
+        Asm = "dc\tcivac";
+      else if (Op1Val == 0 && Op2Val == 2)
+        Asm = "dc\tcisw";
+      break;
+
+    // AT aliases
+    case 8:
+      switch (Op1Val) {
+      default:
+        break;
+      case 0:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "at\ts1e1r"; break;
+        case 1: Asm = "at\ts1e1w"; break;
+        case 2: Asm = "at\ts1e0r"; break;
+        case 3: Asm = "at\ts1e0w"; break;
+        }
+        break;
+      case 4:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "at\ts1e2r"; break;
+        case 1: Asm = "at\ts1e2w"; break;
+        case 4: Asm = "at\ts12e1r"; break;
+        case 5: Asm = "at\ts12e1w"; break;
+        case 6: Asm = "at\ts12e0r"; break;
+        case 7: Asm = "at\ts12e0w"; break;
+        }
+        break;
+      case 6:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "at\ts1e3r"; break;
+        case 1: Asm = "at\ts1e3w"; break;
+        }
+        break;
+      }
+      break;
+    }
+  } else if (CnVal == 8) {
+    // TLBI aliases
+    switch (CmVal) {
+    default:
+      break;
+    case 3:
+      switch (Op1Val) {
+      default:
+        break;
+      case 0:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "tlbi\tvmalle1is"; break;
+        case 1: Asm = "tlbi\tvae1is"; break;
+        case 2: Asm = "tlbi\taside1is"; break;
+        case 3: Asm = "tlbi\tvaae1is"; break;
+        case 5: Asm = "tlbi\tvale1is"; break;
+        case 7: Asm = "tlbi\tvaale1is"; break;
+        }
+        break;
+      case 4:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "tlbi\talle2is"; break;
+        case 1: Asm = "tlbi\tvae2is"; break;
+        case 4: Asm = "tlbi\talle1is"; break;
+        case 5: Asm = "tlbi\tvale2is"; break;
+        case 6: Asm = "tlbi\tvmalls12e1is"; break;
+        }
+        break;
+      case 6:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "tlbi\talle3is"; break;
+        case 1: Asm = "tlbi\tvae3is"; break;
+        case 5: Asm = "tlbi\tvale3is"; break;
+        }
+        break;
+      }
+      break;
+    case 4:
+      switch (Op1Val) {
+      default:
+        break;
+      case 4:
+        switch (Op2Val) {
+        default:
+          break;
+        case 1: Asm = "tlbi\tipas2e1"; break;
+        case 5: Asm = "tlbi\tipas2le1"; break;
+        }
+        break;
+      }
+      break;
+    case 7:
+      switch (Op1Val) {
+      default:
+        break;
+      case 0:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "tlbi\tvmalle1"; break;
+        case 1: Asm = "tlbi\tvae1"; break;
+        case 2: Asm = "tlbi\taside1"; break;
+        case 3: Asm = "tlbi\tvaae1"; break;
+        case 5: Asm = "tlbi\tvale1"; break;
+        case 7: Asm = "tlbi\tvaale1"; break;
+        }
+        break;
+      case 4:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "tlbi\talle2"; break;
+        case 1: Asm = "tlbi\tvae2"; break;
+        case 4: Asm = "tlbi\talle1"; break;
+        case 5: Asm = "tlbi\tvale2"; break;
+        case 6: Asm = "tlbi\tvmalls12e1"; break;
+        }
+        break;
+      case 6:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "tlbi\talle3"; break;
+        case 1: Asm = "tlbi\tvae3";  break;
+        case 5: Asm = "tlbi\tvale3"; break;
+        }
+        break;
+      }
+      break;
+    }
+  }
+
+  if (Asm) {
+    O << '\t' << Asm;
+    if (MI->getNumOperands() == 5)
+      O << ", " << getRegisterName(MI->getOperand(4).getReg());
+  }
+
+  return Asm != 0;
+}
+
+void ARM64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    O << getRegisterName(Reg);
+  } else if (Op.isImm()) {
+    O << '#' << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << *Op.getExpr();
+  }
+}
+
+void ARM64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
+                                           unsigned Imm, raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    if (Reg == ARM64::XZR)
+      O << "#" << Imm;
+    else
+      O << getRegisterName(Reg);
+  } else
+    assert("unknown operand kind in printPostIncOperand64");
+}
+
+void ARM64InstPrinter::printPostIncOperand1(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) {
+  printPostIncOperand(MI, OpNo, 1, O);
+}
+
+void ARM64InstPrinter::printPostIncOperand2(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) {
+  printPostIncOperand(MI, OpNo, 2, O);
+}
+
+void ARM64InstPrinter::printPostIncOperand3(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) {
+  printPostIncOperand(MI, OpNo, 3, O);
+}
+
+void ARM64InstPrinter::printPostIncOperand4(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) {
+  printPostIncOperand(MI, OpNo, 4, O);
+}
+
+void ARM64InstPrinter::printPostIncOperand6(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) {
+  printPostIncOperand(MI, OpNo, 6, O);
+}
+
+void ARM64InstPrinter::printPostIncOperand8(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) {
+  printPostIncOperand(MI, OpNo, 8, O);
+}
+
+void ARM64InstPrinter::printPostIncOperand12(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) {
+  printPostIncOperand(MI, OpNo, 12, O);
+}
+
+void ARM64InstPrinter::printPostIncOperand16(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) {
+  printPostIncOperand(MI, OpNo, 16, O);
+}
+
+void ARM64InstPrinter::printPostIncOperand24(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) {
+  printPostIncOperand(MI, OpNo, 24, O);
+}
+
+void ARM64InstPrinter::printPostIncOperand32(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) {
+  printPostIncOperand(MI, OpNo, 32, O);
+}
+
+void ARM64InstPrinter::printPostIncOperand48(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) {
+  printPostIncOperand(MI, OpNo, 48, O);
+}
+
+void ARM64InstPrinter::printPostIncOperand64(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) {
+  printPostIncOperand(MI, OpNo, 64, O);
+}
+
+void ARM64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  assert(Op.isReg() && "Non-register vreg operand!");
+  unsigned Reg = Op.getReg();
+  O << getRegisterName(Reg, ARM64::vreg);
+}
+
+void ARM64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  assert(Op.isImm() && "System instruction C[nm] operands must be immediates!");
+  O << "c" << Op.getImm();
+}
+
+void ARM64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.isImm()) {
+    unsigned Val = (MO.getImm() & 0xfff);
+    assert(Val == MO.getImm() && "Add/sub immediate out of range!");
+    unsigned Shift =
+        ARM64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
+    O << '#' << (Val << Shift);
+    // Distinguish "0, lsl #12" from "0, lsl #0".
+    if (Val == 0 && Shift != 0)
+      printShifter(MI, OpNum + 1, O);
+  } else {
+    assert(MO.isExpr() && "Unexpected operand type!");
+    O << *MO.getExpr();
+    printShifter(MI, OpNum + 1, O);
+  }
+}
+
+void ARM64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  uint64_t Val = MI->getOperand(OpNum).getImm();
+  O << "#0x";
+  O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 32));
+}
+
+void ARM64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  uint64_t Val = MI->getOperand(OpNum).getImm();
+  O << "#0x";
+  O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 64));
+}
+
+void ARM64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
+                                    raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNum).getImm();
+  // LSL #0 should not be printed.
+  if (ARM64_AM::getShiftType(Val) == ARM64_AM::LSL &&
+      ARM64_AM::getShiftValue(Val) == 0)
+    return;
+  O << ", " << ARM64_AM::getShiftName(ARM64_AM::getShiftType(Val)) << " #"
+    << ARM64_AM::getShiftValue(Val);
+}
+
+void ARM64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+  O << getRegisterName(MI->getOperand(OpNum).getReg());
+  printShifter(MI, OpNum + 1, O);
+}
+
+void ARM64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
+                                             raw_ostream &O) {
+  O << getRegisterName(MI->getOperand(OpNum).getReg());
+  printExtend(MI, OpNum + 1, O);
+}
+
+void ARM64InstPrinter::printExtend(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNum).getImm();
+  ARM64_AM::ExtendType ExtType = ARM64_AM::getArithExtendType(Val);
+  unsigned ShiftVal = ARM64_AM::getArithShiftValue(Val);
+
+  // If the destination or first source register operand is [W]SP, print
+  // UXTW/UXTX as LSL, and if the shift amount is also zero, print nothing at
+  // all.
+  if (ExtType == ARM64_AM::UXTW || ExtType == ARM64_AM::UXTX) {
+    unsigned Dest = MI->getOperand(0).getReg();
+    unsigned Src1 = MI->getOperand(1).getReg();
+    if (Dest == ARM64::SP || Dest == ARM64::WSP || Src1 == ARM64::SP ||
+        Src1 == ARM64::WSP) {
+      if (ShiftVal != 0)
+        O << ", lsl #" << ShiftVal;
+      return;
+    }
+  }
+  O << ", " << ARM64_AM::getExtendName(ExtType);
+  if (ShiftVal != 0)
+    O << " #" << ShiftVal;
+}
+
+void ARM64InstPrinter::printDotCondCode(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm();
+  if (CC != ARM64CC::AL)
+    O << '.' << ARM64CC::getCondCodeName(CC);
+}
+
+void ARM64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum,
+                                     raw_ostream &O) {
+  ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm();
+  O << ARM64CC::getCondCodeName(CC);
+}
+
+void ARM64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
+}
+
+void ARM64InstPrinter::printImmScale4(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  O << '#' << 4 * MI->getOperand(OpNum).getImm();
+}
+
+void ARM64InstPrinter::printImmScale8(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  O << '#' << 8 * MI->getOperand(OpNum).getImm();
+}
+
+void ARM64InstPrinter::printImmScale16(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  O << '#' << 16 * MI->getOperand(OpNum).getImm();
+}
+
+void ARM64InstPrinter::printAMIndexed(const MCInst *MI, unsigned OpNum,
+                                      unsigned Scale, raw_ostream &O) {
+  const MCOperand MO1 = MI->getOperand(OpNum + 1);
+  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg());
+  if (MO1.isImm()) {
+    if (MO1.getImm() != 0)
+      O << ", #" << (MO1.getImm() * Scale);
+  } else {
+    assert(MO1.isExpr() && "Unexpected operand type!");
+    O << ", " << *MO1.getExpr();
+  }
+  O << ']';
+}
+
+void ARM64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  unsigned prfop = MI->getOperand(OpNum).getImm();
+  if (ARM64_AM::isNamedPrefetchOp(prfop))
+    O << ARM64_AM::getPrefetchOpName((ARM64_AM::PrefetchOp)prfop);
+  else
+    O << '#' << prfop;
+}
+
+void ARM64InstPrinter::printMemoryPostIndexed32(const MCInst *MI,
+                                                unsigned OpNum,
+                                                raw_ostream &O) {
+  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #"
+    << 4 * MI->getOperand(OpNum + 1).getImm();
+}
+
+void ARM64InstPrinter::printMemoryPostIndexed64(const MCInst *MI,
+                                                unsigned OpNum,
+                                                raw_ostream &O) {
+  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #"
+    << 8 * MI->getOperand(OpNum + 1).getImm();
+}
+
+void ARM64InstPrinter::printMemoryPostIndexed128(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 raw_ostream &O) {
+  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #"
+    << 16 * MI->getOperand(OpNum + 1).getImm();
+}
+
+void ARM64InstPrinter::printMemoryPostIndexed(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #"
+    << MI->getOperand(OpNum + 1).getImm();
+}
+
+void ARM64InstPrinter::printMemoryRegOffset(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O, int LegalShiftAmt) {
+  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+    << getRegisterName(MI->getOperand(OpNum + 1).getReg());
+
+  unsigned Val = MI->getOperand(OpNum + 2).getImm();
+  ARM64_AM::ExtendType ExtType = ARM64_AM::getMemExtendType(Val);
+  bool DoShift = ARM64_AM::getMemDoShift(Val);
+
+  if (ExtType == ARM64_AM::UXTX) {
+    if (DoShift)
+      O << ", lsl";
+  } else
+    O << ", " << ARM64_AM::getExtendName(ExtType);
+
+  if (DoShift)
+    O << " #" << LegalShiftAmt;
+
+  O << "]";
+}
+
+void ARM64InstPrinter::printMemoryRegOffset8(const MCInst *MI, unsigned OpNum,
+                                             raw_ostream &O) {
+  printMemoryRegOffset(MI, OpNum, O, 0);
+}
+
+void ARM64InstPrinter::printMemoryRegOffset16(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  printMemoryRegOffset(MI, OpNum, O, 1);
+}
+
+void ARM64InstPrinter::printMemoryRegOffset32(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  printMemoryRegOffset(MI, OpNum, O, 2);
+}
+
+void ARM64InstPrinter::printMemoryRegOffset64(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  printMemoryRegOffset(MI, OpNum, O, 3);
+}
+
+void ARM64InstPrinter::printMemoryRegOffset128(const MCInst *MI, unsigned OpNum,
+                                               raw_ostream &O) {
+  printMemoryRegOffset(MI, OpNum, O, 4);
+}
+
+void ARM64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  O << '#';
+  if (MO.isFPImm())
+    // FIXME: Should this ever happen?
+    O << MO.getFPImm();
+  else
+    O << ARM64_AM::getFPImmFloat(MO.getImm());
+}
+
+static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
+  while (Stride--) {
+    switch (Reg) {
+    default:
+      assert(0 && "Vector register expected!");
+    case ARM64::Q0:  Reg = ARM64::Q1;  break;
+    case ARM64::Q1:  Reg = ARM64::Q2;  break;
+    case ARM64::Q2:  Reg = ARM64::Q3;  break;
+    case ARM64::Q3:  Reg = ARM64::Q4;  break;
+    case ARM64::Q4:  Reg = ARM64::Q5;  break;
+    case ARM64::Q5:  Reg = ARM64::Q6;  break;
+    case ARM64::Q6:  Reg = ARM64::Q7;  break;
+    case ARM64::Q7:  Reg = ARM64::Q8;  break;
+    case ARM64::Q8:  Reg = ARM64::Q9;  break;
+    case ARM64::Q9:  Reg = ARM64::Q10; break;
+    case ARM64::Q10: Reg = ARM64::Q11; break;
+    case ARM64::Q11: Reg = ARM64::Q12; break;
+    case ARM64::Q12: Reg = ARM64::Q13; break;
+    case ARM64::Q13: Reg = ARM64::Q14; break;
+    case ARM64::Q14: Reg = ARM64::Q15; break;
+    case ARM64::Q15: Reg = ARM64::Q16; break;
+    case ARM64::Q16: Reg = ARM64::Q17; break;
+    case ARM64::Q17: Reg = ARM64::Q18; break;
+    case ARM64::Q18: Reg = ARM64::Q19; break;
+    case ARM64::Q19: Reg = ARM64::Q20; break;
+    case ARM64::Q20: Reg = ARM64::Q21; break;
+    case ARM64::Q21: Reg = ARM64::Q22; break;
+    case ARM64::Q22: Reg = ARM64::Q23; break;
+    case ARM64::Q23: Reg = ARM64::Q24; break;
+    case ARM64::Q24: Reg = ARM64::Q25; break;
+    case ARM64::Q25: Reg = ARM64::Q26; break;
+    case ARM64::Q26: Reg = ARM64::Q27; break;
+    case ARM64::Q27: Reg = ARM64::Q28; break;
+    case ARM64::Q28: Reg = ARM64::Q29; break;
+    case ARM64::Q29: Reg = ARM64::Q30; break;
+    case ARM64::Q30: Reg = ARM64::Q31; break;
+    // Vector lists can wrap around.
+    case ARM64::Q31:
+      Reg = ARM64::Q0;
+      break;
+    }
+  }
+  return Reg;
+}
+
+void ARM64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O, StringRef LayoutSuffix) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+
+  O << "{ ";
+
+  // Work out how many registers there are in the list (if there is an actual
+  // list).
+  unsigned NumRegs = 1;
+  if (MRI.getRegClass(ARM64::DDRegClassID).contains(Reg) ||
+      MRI.getRegClass(ARM64::QQRegClassID).contains(Reg))
+    NumRegs = 2;
+  else if (MRI.getRegClass(ARM64::DDDRegClassID).contains(Reg) ||
+           MRI.getRegClass(ARM64::QQQRegClassID).contains(Reg))
+    NumRegs = 3;
+  else if (MRI.getRegClass(ARM64::DDDDRegClassID).contains(Reg) ||
+           MRI.getRegClass(ARM64::QQQQRegClassID).contains(Reg))
+    NumRegs = 4;
+
+  // Now forget about the list and find out what the first register is.
+  if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::dsub0))
+    Reg = FirstReg;
+  else if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::qsub0))
+    Reg = FirstReg;
+
+  // If it's a D-reg, we need to promote it to the equivalent Q-reg before
+  // printing (otherwise getRegisterName fails).
+  if (MRI.getRegClass(ARM64::FPR64RegClassID).contains(Reg)) {
+    const MCRegisterClass &FPR128RC = MRI.getRegClass(ARM64::FPR128RegClassID);
+    Reg = MRI.getMatchingSuperReg(Reg, ARM64::dsub, &FPR128RC);
+  }
+
+  for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) {
+    O << getRegisterName(Reg, ARM64::vreg) << LayoutSuffix;
+    if (i + 1 != NumRegs)
+      O << ", ";
+  }
+
+  O << " }";
+}
+
+void ARM64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
+                                                      unsigned OpNum,
+                                                      raw_ostream &O) {
+  printVectorList(MI, OpNum, O, "");
+}
+
+template <unsigned NumLanes, char LaneKind>
+void ARM64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+  Twine Suffix;
+  if (NumLanes)
+    Suffix = Twine('.') + Twine(NumLanes) + Twine(LaneKind);
+  else
+    Suffix = Twine('.') + Twine(LaneKind);
+
+  SmallString<8> Buf;
+  printVectorList(MI, OpNum, O, Suffix.toStringRef(Buf));
+}
+
+void ARM64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  O << "[" << MI->getOperand(OpNum).getImm() << "]";
+}
+
+void ARM64InstPrinter::printAlignedBranchTarget(const MCInst *MI,
+                                                unsigned OpNum,
+                                                raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+
+  // If the label has already been resolved to an immediate offset (say, when
+  // we're running the disassembler), just print the immediate.
+  if (Op.isImm()) {
+    O << "#" << (Op.getImm() << 2);
+    return;
+  }
+
+  // If the branch target is simply an address then print it in hex.
+  const MCConstantExpr *BranchTarget =
+      dyn_cast<MCConstantExpr>(MI->getOperand(OpNum).getExpr());
+  int64_t Address;
+  if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+    O << "0x";
+    O.write_hex(Address);
+  } else {
+    // Otherwise, just print the expression.
+    O << *MI->getOperand(OpNum).getExpr();
+  }
+}
+
+void ARM64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+
+  // If the label has already been resolved to an immediate offset (say, when
+  // we're running the disassembler), just print the immediate.
+  if (Op.isImm()) {
+    O << "#" << (Op.getImm() << 12);
+    return;
+  }
+
+  // Otherwise, just print the expression.
+  O << *MI->getOperand(OpNum).getExpr();
+}
+
+void ARM64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+  const char *Name = ARM64SYS::getBarrierOptName((ARM64SYS::BarrierOption)Val);
+  if (Name)
+    O << Name;
+  else
+    O << "#" << Val;
+}
+
+void ARM64InstPrinter::printSystemRegister(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+  const char *Name =
+      ARM64SYS::getSystemRegisterName((ARM64SYS::SystemRegister)Val);
+  if (Name) {
+    O << Name;
+    return;
+  }
+
+  unsigned Op0 = 2 | ((Val >> 14) & 1);
+  unsigned Op1 = (Val >> 11) & 7;
+  unsigned CRn = (Val >> 7) & 0xf;
+  unsigned CRm = (Val >> 3) & 0xf;
+  unsigned Op2 = Val & 7;
+
+  O << 'S' << Op0 << '_' << Op1 << "_C" << CRn << "_C" << CRm << '_' << Op2;
+}
+
+void ARM64InstPrinter::printSystemCPSRField(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+  const char *Name = ARM64SYS::getCPSRFieldName((ARM64SYS::CPSRField)Val);
+  O << Name;
+}
+
+void ARM64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
+                                              raw_ostream &O) {
+  unsigned RawVal = MI->getOperand(OpNo).getImm();
+  uint64_t Val = ARM64_AM::decodeAdvSIMDModImmType10(RawVal);
+  O << format("#%#016lx", Val);
+}

Added: llvm/trunk/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h (added)
+++ llvm/trunk/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h Sat Mar 29 05:18:08 2014
@@ -0,0 +1,157 @@
+//===-- ARM64InstPrinter.h - Convert ARM64 MCInst to assembly syntax ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM64INSTPRINTER_H
+#define ARM64INSTPRINTER_H
+
+#include "MCTargetDesc/ARM64MCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class ARM64InstPrinter : public MCInstPrinter {
+public:
+  ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                   const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+
+  virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+  virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+
+  // Autogenerated by tblgen.
+  virtual void printInstruction(const MCInst *MI, raw_ostream &O);
+  virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+  virtual StringRef getRegName(unsigned RegNo) const {
+    return getRegisterName(RegNo);
+  }
+  static const char *getRegisterName(unsigned RegNo,
+                                     unsigned AltIdx = ARM64::NoRegAltName);
+
+protected:
+  bool printSysAlias(const MCInst *MI, raw_ostream &O);
+  // Operand printers
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
+                           raw_ostream &O);
+  void printPostIncOperand1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPostIncOperand2(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPostIncOperand3(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPostIncOperand4(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPostIncOperand6(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPostIncOperand8(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPostIncOperand12(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPostIncOperand16(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPostIncOperand24(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPostIncOperand32(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPostIncOperand48(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPostIncOperand64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printDotCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAlignedBranchTarget(const MCInst *MI, unsigned OpNum,
+                                raw_ostream &O);
+  void printAMIndexed(const MCInst *MI, unsigned OpNum, unsigned Scale,
+                      raw_ostream &O);
+  void printAMIndexed128(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printAMIndexed(MI, OpNum, 16, O);
+  }
+
+  void printAMIndexed64(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printAMIndexed(MI, OpNum, 8, O);
+  }
+
+  void printAMIndexed32(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printAMIndexed(MI, OpNum, 4, O);
+  }
+
+  void printAMIndexed16(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printAMIndexed(MI, OpNum, 2, O);
+  }
+
+  void printAMIndexed8(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printAMIndexed(MI, OpNum, 1, O);
+  }
+  void printAMUnscaled(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printAMIndexed(MI, OpNum, 1, O);
+  }
+  void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printImmScale4(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printImmScale8(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printImmScale16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMemoryPostIndexed(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMemoryPostIndexed32(const MCInst *MI, unsigned OpNum,
+                                raw_ostream &O);
+  void printMemoryPostIndexed64(const MCInst *MI, unsigned OpNum,
+                                raw_ostream &O);
+  void printMemoryPostIndexed128(const MCInst *MI, unsigned OpNum,
+                                 raw_ostream &O);
+  void printMemoryRegOffset(const MCInst *MI, unsigned OpNum, raw_ostream &O,
+                            int LegalShiftAmt);
+  void printMemoryRegOffset8(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMemoryRegOffset16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMemoryRegOffset32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMemoryRegOffset64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMemoryRegOffset128(const MCInst *MI, unsigned OpNum,
+                               raw_ostream &O);
+
+  void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O,
+                       StringRef LayoutSuffix);
+
+  /// Print a list of vector registers where the type suffix is implicit
+  /// (i.e. attached to the instruction rather than the registers).
+  void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O);
+
+  template <unsigned NumLanes, char LaneKind>
+  void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printSystemCPSRField(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+};
+
+class ARM64AppleInstPrinter : public ARM64InstPrinter {
+public:
+  ARM64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                        const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+
+  virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+  virtual void printInstruction(const MCInst *MI, raw_ostream &O);
+  virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+  virtual StringRef getRegName(unsigned RegNo) const {
+    return getRegisterName(RegNo);
+  }
+  static const char *getRegisterName(unsigned RegNo,
+                                     unsigned AltIdx = ARM64::NoRegAltName);
+};
+}
+
+#endif

Added: llvm/trunk/lib/Target/ARM64/InstPrinter/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/InstPrinter/CMakeLists.txt?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/InstPrinter/CMakeLists.txt (added)
+++ llvm/trunk/lib/Target/ARM64/InstPrinter/CMakeLists.txt Sat Mar 29 05:18:08 2014
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARM64AsmPrinter
+  ARM64InstPrinter.cpp
+  )
+
+add_dependencies(LLVMARM64AsmPrinter ARM64CommonTableGen)

Added: llvm/trunk/lib/Target/ARM64/InstPrinter/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/InstPrinter/LLVMBuild.txt?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/InstPrinter/LLVMBuild.txt (added)
+++ llvm/trunk/lib/Target/ARM64/InstPrinter/LLVMBuild.txt Sat Mar 29 05:18:08 2014
@@ -0,0 +1,24 @@
+;===- ./lib/Target/ARM64/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARM64AsmPrinter
+parent = ARM64
+required_libraries = MC Support
+add_to_library_groups = ARM64
+

Added: llvm/trunk/lib/Target/ARM64/InstPrinter/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/InstPrinter/Makefile?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/InstPrinter/Makefile (added)
+++ llvm/trunk/lib/Target/ARM64/InstPrinter/Makefile Sat Mar 29 05:18:08 2014
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM64/AsmPrinter/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARM64AsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common

Added: llvm/trunk/lib/Target/ARM64/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/LLVMBuild.txt?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/LLVMBuild.txt (added)
+++ llvm/trunk/lib/Target/ARM64/LLVMBuild.txt Sat Mar 29 05:18:08 2014
@@ -0,0 +1,36 @@
+;===- ./lib/Target/ARM64/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = ARM64
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_disassembler = 1
+has_jit = 1
+
+[component_1]
+type = Library
+name = ARM64CodeGen
+parent = ARM64
+required_libraries = ARM64AsmPrinter ARM64Desc ARM64Info Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target
+add_to_library_groups = ARM64
+

Added: llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h?rev=205090&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h (added)
+++ llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h Sat Mar 29 05:18:08 2014
@@ -0,0 +1,759 @@
+//===- ARM64AddressingModes.h - ARM64 Addressing Modes ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM64 addressing mode implementation stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H
+#define LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// ARM64_AM - ARM64 Addressing Mode Stuff
+namespace ARM64_AM {
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//
+
+enum ShiftType {
+  InvalidShift = -1,
+  LSL = 0,
+  LSR = 1,
+  ASR = 2,
+  ROR = 3,
+  MSL = 4
+};
+
+/// getShiftName - Get the string encoding for the shift type.
+static inline const char *getShiftName(ARM64_AM::ShiftType ST) {
+  switch (ST) {
+  default: assert(false && "unhandled shift type!");
+  case ARM64_AM::LSL: return "lsl";
+  case ARM64_AM::LSR: return "lsr";
+  case ARM64_AM::ASR: return "asr";
+  case ARM64_AM::ROR: return "ror";
+  case ARM64_AM::MSL: return "msl";
+  }
+  return 0;
+}
+
+/// getShiftType - Extract the shift type.
+static inline ARM64_AM::ShiftType getShiftType(unsigned Imm) {
+  return ARM64_AM::ShiftType((Imm >> 6) & 0x7);
+}
+
+/// getShiftValue - Extract the shift value.
+static inline unsigned getShiftValue(unsigned Imm) {
+  return Imm & 0x3f;
+}
+
+/// getShifterImm - Encode the shift type and amount:
+///   imm:     6-bit shift amount
+///   shifter: 000 ==> lsl
+///            001 ==> lsr
+///            010 ==> asr
+///            011 ==> ror
+///            100 ==> msl
+///   {8-6}  = shifter
+///   {5-0}  = imm
+static inline unsigned getShifterImm(ARM64_AM::ShiftType ST, unsigned Imm) {
+  assert((Imm & 0x3f) == Imm && "Illegal shifted immedate value!");
+  return (unsigned(ST) << 6) | (Imm & 0x3f);
+}
+
+//===----------------------------------------------------------------------===//
+// Extends
+//
+
+enum ExtendType {
+  InvalidExtend = -1,
+  UXTB = 0,
+  UXTH = 1,
+  UXTW = 2,
+  UXTX = 3,
+  SXTB = 4,
+  SXTH = 5,
+  SXTW = 6,
+  SXTX = 7
+};
+
+/// getExtendName - Get the string encoding for the extend type.
+static inline const char *getExtendName(ARM64_AM::ExtendType ET) {
+  switch (ET) {
+  default: assert(false && "unhandled extend type!");
+  case ARM64_AM::UXTB: return "uxtb";
+  case ARM64_AM::UXTH: return "uxth";
+  case ARM64_AM::UXTW: return "uxtw";
+  case ARM64_AM::UXTX: return "uxtx";
+  case ARM64_AM::SXTB: return "sxtb";
+  case ARM64_AM::SXTH: return "sxth";
+  case ARM64_AM::SXTW: return "sxtw";
+  case ARM64_AM::SXTX: return "sxtx";
+  }
+  return 0;
+}
+
+/// getArithShiftValue - get the arithmetic shift value.
+static inline unsigned getArithShiftValue(unsigned Imm) {
+  return Imm & 0x7;
+}
+
+/// getExtendType - Extract the extend type for operands of arithmetic ops.
+static inline ARM64_AM::ExtendType getArithExtendType(unsigned Imm) {
+  return ARM64_AM::ExtendType((Imm >> 3) & 0x7);
+}
+
+/// getArithExtendImm - Encode the extend type and shift amount for an
+///                     arithmetic instruction:
+///   imm:     3-bit extend amount
+///   shifter: 000 ==> uxtb
+///            001 ==> uxth
+///            010 ==> uxtw
+///            011 ==> uxtx
+///            100 ==> sxtb
+///            101 ==> sxth
+///            110 ==> sxtw
+///            111 ==> sxtx
+///   {5-3}  = shifter
+///   {2-0}  = imm3
+static inline unsigned getArithExtendImm(ARM64_AM::ExtendType ET,
+                                         unsigned Imm) {
+  assert((Imm & 0x7) == Imm && "Illegal shifted immedate value!");
+  return (unsigned(ET) << 3) | (Imm & 0x7);
+}
+
+/// getMemDoShift - Extract the "do shift" flag value for load/store
+/// instructions.
+static inline bool getMemDoShift(unsigned Imm) {
+  return (Imm & 0x1) != 0;
+}
+
+/// getExtendType - Extract the extend type for the offset operand of
+/// loads/stores.
+static inline ARM64_AM::ExtendType getMemExtendType(unsigned Imm) {
+  return ARM64_AM::ExtendType((Imm >> 1) & 0x7);
+}
+
+/// getExtendImm - Encode the extend type and amount for a load/store inst:
+///   imm:     3-bit extend amount
+///   shifter: 000 ==> uxtb
+///            001 ==> uxth
+///            010 ==> uxtw
+///            011 ==> uxtx
+///            100 ==> sxtb
+///            101 ==> sxth
+///            110 ==> sxtw
+///            111 ==> sxtx
+///   {3-1}  = shifter
+///   {0}  = imm3
+static inline unsigned getMemExtendImm(ARM64_AM::ExtendType ET, bool Imm) {
+  assert((Imm & 0x7) == Imm && "Illegal shifted immedate value!");
+  return (unsigned(ET) << 1) | (Imm & 0x7);
+}
+
+//===----------------------------------------------------------------------===//
+// Prefetch
+//
+
+/// Pre-fetch operator names.
+/// The enum values match the encoding values:
+///   prfop<4:3> 00=preload data, 10=prepare for store
+///   prfop<2:1> 00=target L1 cache, 01=target L2 cache, 10=target L3 cache,
+///   prfop<0> 0=non-streaming (temporal), 1=streaming (non-temporal)
+enum PrefetchOp {
+  InvalidPrefetchOp = -1,
+  PLDL1KEEP = 0x00,
+  PLDL1STRM = 0x01,
+  PLDL2KEEP = 0x02,
+  PLDL2STRM = 0x03,
+  PLDL3KEEP = 0x04,
+  PLDL3STRM = 0x05,
+  PSTL1KEEP = 0x10,
+  PSTL1STRM = 0x11,
+  PSTL2KEEP = 0x12,
+  PSTL2STRM = 0x13,
+  PSTL3KEEP = 0x14,
+  PSTL3STRM = 0x15
+};
+
+/// isNamedPrefetchOp - Check if the prefetch-op 5-bit value has a name.
+static inline bool isNamedPrefetchOp(unsigned prfop) {
+  switch (prfop) {
+  default: return false;
+  case ARM64_AM::PLDL1KEEP: case ARM64_AM::PLDL1STRM: case ARM64_AM::PLDL2KEEP:
+  case ARM64_AM::PLDL2STRM: case ARM64_AM::PLDL3KEEP: case ARM64_AM::PLDL3STRM:
+  case ARM64_AM::PSTL1KEEP: case ARM64_AM::PSTL1STRM: case ARM64_AM::PSTL2KEEP:
+  case ARM64_AM::PSTL2STRM: case ARM64_AM::PSTL3KEEP: case ARM64_AM::PSTL3STRM:
+    return true;
+  }
+}
+
+
+/// getPrefetchOpName - Get the string encoding for the prefetch operator.
+static inline const char *getPrefetchOpName(ARM64_AM::PrefetchOp prfop) {
+  switch (prfop) {
+  default: assert(false && "unhandled prefetch-op type!");
+  case ARM64_AM::PLDL1KEEP: return "pldl1keep";
+  case ARM64_AM::PLDL1STRM: return "pldl1strm";
+  case ARM64_AM::PLDL2KEEP: return "pldl2keep";
+  case ARM64_AM::PLDL2STRM: return "pldl2strm";
+  case ARM64_AM::PLDL3KEEP: return "pldl3keep";
+  case ARM64_AM::PLDL3STRM: return "pldl3strm";
+  case ARM64_AM::PSTL1KEEP: return "pstl1keep";
+  case ARM64_AM::PSTL1STRM: return "pstl1strm";
+  case ARM64_AM::PSTL2KEEP: return "pstl2keep";
+  case ARM64_AM::PSTL2STRM: return "pstl2strm";
+  case ARM64_AM::PSTL3KEEP: return "pstl3keep";
+  case ARM64_AM::PSTL3STRM: return "pstl3strm";
+  }
+  return 0;
+}
+
+static inline uint64_t ror(uint64_t elt, unsigned size) {
+  return ((elt & 1) << (size-1)) | (elt >> 1);
+}
+
+/// processLogicalImmediate - Determine if an immediate value can be encoded
+/// as the immediate operand of a logical instruction for the given register
+/// size.  If so, return true with "encoding" set to the encoded value in
+/// the form N:immr:imms.
+static inline bool processLogicalImmediate(uint64_t imm, unsigned regSize,
+                                           uint64_t &encoding) {
+  if (imm == 0ULL || imm == ~0ULL ||
+      (regSize != 64 && (imm >> regSize != 0 || imm == ~0U)))
+    return false;
+
+  unsigned size = 2;
+  uint64_t eltVal = imm;
+
+  // First, determine the element size.
+  while (size < regSize) {
+    unsigned numElts = regSize / size;
+    unsigned mask = (1ULL << size) - 1;
+    uint64_t lowestEltVal = imm & mask;
+
+    bool allMatched = true;
+    for (unsigned i = 1; i < numElts; ++i) {
+     uint64_t currEltVal = (imm >> (i*size)) & mask;
+      if (currEltVal != lowestEltVal) {
+        allMatched = false;
+        break;
+      }
+    }
+
+    if (allMatched) {
+      eltVal = lowestEltVal;
+      break;
+    }
+
+    size *= 2;
+  }
+
+  // Second, determine the rotation to make the element be: 0^m 1^n.
+  for (unsigned i = 0; i < size; ++i) {
+    eltVal = ror(eltVal, size);
+    uint32_t clz = countLeadingZeros(eltVal) - (64 - size);
+    uint32_t cto = CountTrailingOnes_64(eltVal);
+
+    if (clz + cto == size) {
+      // Encode in immr the number of RORs it would take to get *from* this
+      // element value to our target value, where i+1 is the number of RORs
+      // to go the opposite direction.
+      unsigned immr = size - (i + 1);
+
+      // If size has a 1 in the n'th bit, create a value that has zeroes in
+      // bits [0, n] and ones above that.
+      uint64_t nimms = ~(size-1) << 1;
+
+      // Or the CTO value into the low bits, which must be below the Nth bit
+      // bit mentioned above.
+      nimms |= (cto-1);
+
+      // Extract the seventh bit and toggle it to create the N field.
+      unsigned N = ((nimms >> 6) & 1) ^ 1;
+
+      encoding = (N << 12) | (immr << 6) | (nimms & 0x3f);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/// isLogicalImmediate - Return true if the immediate is valid for a logical
+/// immediate instruction of the given register size. Return false otherwise.
+static inline bool isLogicalImmediate(uint64_t imm, unsigned regSize) {
+  uint64_t encoding;
+  return processLogicalImmediate(imm, regSize, encoding);
+}
+
+/// encodeLogicalImmediate - Return the encoded immediate value for a logical
+/// immediate instruction of the given register size.
+static inline uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize) {
+  uint64_t encoding = 0;
+  bool res = processLogicalImmediate(imm, regSize, encoding);
+  assert(res && "invalid logical immediate");
+  (void)res;
+  return encoding;
+}
+
+/// decodeLogicalImmediate - Decode a logical immediate value in the form
+/// "N:immr:imms" (where the immr and imms fields are each 6 bits) into the
+/// integer value it represents with regSize bits.
+static inline uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize) {
+  // Extract the N, imms, and immr fields.
+  unsigned N = (val >> 12) & 1;
+  unsigned immr = (val >> 6) & 0x3f;
+  unsigned imms = val & 0x3f;
+
+  assert((regSize == 64 || N == 0) && "undefined logical immediate encoding");
+  int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f));
+  assert(len >= 0 && "undefined logical immediate encoding");
+  unsigned size = (1 << len);
+  unsigned R = immr & (size - 1);
+  unsigned S = imms & (size - 1);
+  assert(S != size - 1 && "undefined logical immediate encoding");
+  uint64_t pattern = (1ULL << (S + 1)) - 1;
+  for (unsigned i = 0; i &