[llvm] [AArch64] Update latencies for Cortex-A510 scheduling model (PR #87293)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 1 16:16:57 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Usman Nadeem (UsmanNadeem)
<details>
<summary>Changes</summary>
Updated according to the Software Optimization Guide for Arm® Cortex®‑A510 Core Revision: r1p3 Issue 6.0.
---
Patch is 1.42 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/87293.diff
206 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64SchedA510.td (+76-71)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/aarch64-addv.ll (+5-5)
- (modified) llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll (+20-20)
- (modified) llvm/test/CodeGen/AArch64/aarch64-smull.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/active_lane_mask.ll (+86-83)
- (modified) llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll (+7-7)
- (modified) llvm/test/CodeGen/AArch64/arm64-vabs.ll (+53-53)
- (modified) llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll (+14-14)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll (+33-31)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll (+32-31)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul-scalable.ll (+13-12)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul-scalable.ll (+9-9)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul-scalable.ll (+9-9)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll (+2-1)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll (+32-32)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll (+13-13)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/extbinopload.ll (+61-60)
- (modified) llvm/test/CodeGen/AArch64/fcmp.ll (+54-54)
- (modified) llvm/test/CodeGen/AArch64/fdiv-combine.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/fp-veclib-expansion.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll (+10-10)
- (modified) llvm/test/CodeGen/AArch64/funnel-shift-rot.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/icmp.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/insert-extend.ll (+27-27)
- (modified) llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll (+35-35)
- (modified) llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll (+46-45)
- (modified) llvm/test/CodeGen/AArch64/itofp.ll (+566-566)
- (modified) llvm/test/CodeGen/AArch64/ldexp.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll (+22-23)
- (modified) llvm/test/CodeGen/AArch64/load-insert-zero.ll (+14-14)
- (modified) llvm/test/CodeGen/AArch64/logic-shift.ll (+24-24)
- (modified) llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll (+38-38)
- (modified) llvm/test/CodeGen/AArch64/neon-dotreduce.ll (+379-379)
- (modified) llvm/test/CodeGen/AArch64/neon-extadd.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/neon-shift-neg.ll (+12-12)
- (modified) llvm/test/CodeGen/AArch64/predicated-add-sub.ll (+11-11)
- (modified) llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll (+129-133)
- (modified) llvm/test/CodeGen/AArch64/rcpc3-sve.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/reassocmls.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/reduce-shuffle.ll (+178-178)
- (modified) llvm/test/CodeGen/AArch64/sat-add.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/sext.ll (+18-18)
- (modified) llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll (+12-12)
- (modified) llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll (+65-65)
- (modified) llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll (+21-21)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll (+139-166)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll (+68-95)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll (+27-27)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll (+73-88)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll (+41-48)
- (modified) llvm/test/CodeGen/AArch64/split-vector-insert.ll (+173-92)
- (modified) llvm/test/CodeGen/AArch64/sve-abd.ll (+9-9)
- (modified) llvm/test/CodeGen/AArch64/sve-bitcast.ll (+10-10)
- (modified) llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll (+58-58)
- (modified) llvm/test/CodeGen/AArch64/sve-cmp-folds.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/sve-doublereduct.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/sve-expand-div.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/sve-extract-element.ll (+5-5)
- (modified) llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll (+8-10)
- (modified) llvm/test/CodeGen/AArch64/sve-fcmp.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-fcopysign.ll (+9-9)
- (modified) llvm/test/CodeGen/AArch64/sve-fcvt.ll (+32-32)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-build-vector.ll (+4-9)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll (+56-56)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll (+12-12)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll (+117-117)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll (+12-12)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll (+16-16)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll (+111-111)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll (+28-28)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll (+28-28)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll (+156-156)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll (+12-12)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll (+9-9)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll (+74-74)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll (+16-16)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll (+78-78)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll (+21-21)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll (+13-13)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll (+42-42)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-fp-reciprocal.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll (+134-134)
- (modified) llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll (+23-23)
- (modified) llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll (+15-15)
- (modified) llvm/test/CodeGen/AArch64/sve-hadd.ll (+10-10)
- (modified) llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/sve-insert-element.ll (+44-44)
- (modified) llvm/test/CodeGen/AArch64/sve-insert-vector.ll (+11-11)
- (modified) llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll (+17-17)
- (modified) llvm/test/CodeGen/AArch64/sve-int-arith.ll (+10-10)
- (modified) llvm/test/CodeGen/AArch64/sve-int-reduce.ll (+31-31)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll (+19-19)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll (+15-15)
- (modified) llvm/test/CodeGen/AArch64/sve-ld1r.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll (+5-5)
- (modified) llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/sve-masked-scatter-legalize.ll (+10-10)
- (modified) llvm/test/CodeGen/AArch64/sve-masked-scatter.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-pr62151.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-pred-arith.ll (+20-20)
- (modified) llvm/test/CodeGen/AArch64/sve-pred-selectop.ll (+54-54)
- (modified) llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll (+38-38)
- (modified) llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll (+14-14)
- (modified) llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/sve-redundant-store.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll (+21-21)
- (modified) llvm/test/CodeGen/AArch64/sve-split-fcvt.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll (+13-13)
- (modified) llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/sve-split-load.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/sve-split-store.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/sve-srem-combine-loop.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/sve-stepvector.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll (+18-18)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll (+5-5)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll (+13-13)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll (+24-24)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll (+22-22)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll (+5-5)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll (+12-12)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll (+15-15)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll (+21-21)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll (+9-9)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll (+58-60)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll (+21-21)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll (+7-7)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll (+17-17)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll (+16-16)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll (+10-10)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll (+20-20)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll (+32-32)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll (+14-14)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll (+16-16)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll (+29-29)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll (+5-5)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll (+12-12)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll (+5-5)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll (+12-12)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll (+15-15)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll (+39-39)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll (+7-7)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll (+177-177)
- (modified) llvm/test/CodeGen/AArch64/sve-trunc.ll (+19-17)
- (modified) llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll (+22-22)
- (modified) llvm/test/CodeGen/AArch64/sve-uunpklo-load-uzp1-store-combine.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve2-fcopysign.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/sve2-fixed-length-fcopysign.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll (+11-11)
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve2-rsh.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve2-xar.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx2.ll (+16-16)
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll (+32-32)
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll (+32-32)
- (modified) llvm/test/CodeGen/AArch64/uadd_sat_vec.ll (+5-5)
- (modified) llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/vec_uaddo.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-add.ll (+265-265)
- (modified) llvm/test/CodeGen/AArch64/vector-fcopysign.ll (+23-23)
- (modified) llvm/test/CodeGen/AArch64/vector-gep.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/vselect-constants.ll (+2-3)
- (modified) llvm/test/CodeGen/AArch64/zext-to-tbl.ll (+25-25)
- (modified) llvm/test/tools/llvm-mca/AArch64/Cortex/A510-neon-instructions.s (+171-171)
- (modified) llvm/test/tools/llvm-mca/AArch64/Cortex/A510-sve-instructions.s (+818-818)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td
index 68343674bc819e..2ad3359657ce8d 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA510.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -254,7 +254,7 @@ def : InstRW<[WriteIS], (instrs RBITWr, RBITXr)>;
// Compute pointer authentication code for data address
// Compute pointer authentication code, using generic key
// Compute pointer authentication code for instruction address
-def : InstRW<[CortexA510Write<3, CortexA510UnitPAC>], (instregex "^AUT", "^PAC")>;
+def : InstRW<[CortexA510Write<5, CortexA510UnitPAC>], (instregex "^AUT", "^PAC")>;
// Branch and link, register, with pointer authentication
// Branch, register, with pointer authentication
@@ -401,30 +401,30 @@ def : InstRW<[CortexA510WriteFPALU_F3], (instrs FCSELHrrr, FCSELSrrr, FCSELDrrr)
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]ABDv(2i32|4i16|8i8)")>;
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]ABDv(16i8|4i32|8i16)")>;
// ASIMD absolute diff accum
-def : InstRW<[CortexA510Write<8, CortexA510UnitVALU>], (instregex "[SU]ABAL?v")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU>], (instregex "[SU]ABAL?v")>;
// ASIMD absolute diff long
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]ABDLv")>;
// ASIMD arith #1
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "(ADD|SUB|NEG)v(1i64|2i32|4i16|8i8)",
- "[SU]R?HADDv(2i32|4i16|8i8)", "[SU]HSUBv(2i32|4i16|8i8)")>;
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "(ADD|SUB|NEG)v(2i64|4i32|8i16|16i8)",
- "[SU]R?HADDv(8i16|4i32|16i8)", "[SU]HSUBv(8i16|4i32|16i8)")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "(ADD|SUB|NEG)v",
+ "[SU]R?HADDv", "[SU]HSUBv")>;
// ASIMD arith #2
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "ABSv(1i64|2i32|4i16|8i8)$",
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "ABSv(1i64|2i32|4i16|8i8)$",
"[SU]ADDLPv(2i32_v1i64|4i16_v2i32|8i8_v4i16)$",
- "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(1i16|1i32|1i64|1i8|2i32|4i16|8i8)$",
"ADDPv(2i32|4i16|8i8)$")>;
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "ABSv(2i64|4i32|8i16|16i8)$",
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(1i16|1i32|1i64|1i8|2i32|4i16|8i8)$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "ABSv(2i64|4i32|8i16|16i8)$",
"[SU]ADDLPv(16i8_v8i16|4i32_v2i64|8i16_v4i32)$",
- "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(16i8|2i64|4i32|8i16)$",
"ADDPv(16i8|2i64|4i32|8i16)$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(16i8|2i64|4i32|8i16)$")>;
// ASIMD arith #3
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "SADDLv", "UADDLv", "SADDWv",
- "UADDWv", "SSUBLv", "USUBLv", "SSUBWv", "USUBWv", "ADDHNv", "SUBHNv")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "SADDLv", "UADDLv", "SADDWv",
+ "UADDWv", "SSUBLv", "USUBLv", "SSUBWv", "USUBWv")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "ADDHNv", "SUBHNv")>;
// ASIMD arith #5
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "RADDHNv", "RSUBHNv")>;
+def : InstRW<[CortexA510Write<8, CortexA510UnitVALU>], (instregex "RADDHNv", "RSUBHNv")>;
// ASIMD arith, reduce
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "ADDVv", "SADDLVv", "UADDLVv")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "ADDVv")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "SADDLVv", "UADDLVv")>;
// ASIMD compare #1
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(1i64|2i32|4i16|8i8)")>;
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(2i64|4i32|8i16|16i8)")>;
@@ -437,10 +437,10 @@ def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "(AND|EOR|NOT|
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "(AND|EOR|NOT|ORN)v16i8",
"(ORR|BIC)v(16i8|4i32|8i16)$", "MVNIv(4i32|4s|8i16)")>;
// ASIMD max/min, basic
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU](MIN|MAX)P?v(2i32|4i16|8i8)")>;
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU](MIN|MAX)P?v(16i8|4i132|8i16)")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU](MIN|MAX)P?v(2i32|4i16|8i8)")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU](MIN|MAX)P?v(16i8|4i132|8i16)")>;
// SIMD max/min, reduce
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU](MAX|MIN)Vv")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU](MAX|MIN)Vv")>;
// ASIMD multiply, by element
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "MULv(2i32|4i16|4i32|8i16)_indexed$",
"SQR?DMULHv(1i16|1i32|2i32|4i16|4i32|8i16)_indexed$")>;
@@ -467,12 +467,12 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU]MULLv", "
// ASIMD polynomial (8x8) multiply long
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instrs PMULLv8i8, PMULLv16i8)>;
// ASIMD pairwise add and accumulate
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "[SU]ADALPv")>;
+def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "[SU]ADALPv")>;
// ASIMD shift accumulate
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "[SU]SRA(d|v2i32|v4i16|v8i8)")>;
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "[SU]SRAv(16i8|2i64|4i32|8i16)")>;
+def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "[SU]SRA(d|v2i32|v4i16|v8i8)")>;
+def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "[SU]SRAv(16i8|2i64|4i32|8i16)")>;
// ASIMD shift accumulate #2
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "[SU]RSRA[vd]")>;
+def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "[SU]RSRA[vd]")>;
// ASIMD shift by immed
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "SHLd$", "SHLv",
"SLId$", "SRId$", "[SU]SHR[vd]", "SHRNv")>;
@@ -504,7 +504,7 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU]QRSHLv(2i
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
// Crypto polynomial (64x64) multiply long
-def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instrs PMULLv1i64, PMULLv2i64)>;
+def : InstRW<[CortexA510MCWrite<4, 0, CortexA510UnitVMC>], (instrs PMULLv1i64, PMULLv2i64)>;
// Crypto SHA1 hash acceleration op
// Crypto SHA1 schedule acceleration ops
@@ -512,25 +512,26 @@ def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^SHA1(H|SU0|S
// Crypto SHA1 hash acceleration ops
// Crypto SHA256 hash acceleration ops
-def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
+def : InstRW<[CortexA510MCWrite<4, 0, CortexA510UnitVMC>], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
// Crypto SHA256 schedule acceleration ops
-def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instregex "^SHA256SU[01]")>;
+def : InstRW<[CortexA510MCWrite<4, 0, CortexA510UnitVMC>], (instregex "^SHA256SU[01]")>;
// Crypto SHA512 hash acceleration ops
-def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instregex "^SHA512(H|H2|SU0|SU1)")>;
+def : InstRW<[CortexA510MCWrite<9, 0, CortexA510UnitVMC>], (instregex "^SHA512(H|H2|SU0|SU1)")>;
// Crypto SHA3 ops
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instrs BCAX, EOR3, XAR)>;
-def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instrs RAX1)>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instrs BCAX, EOR3)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instrs XAR)>;
+def : InstRW<[CortexA510MCWrite<9, 0, CortexA510UnitVMC>], (instrs RAX1)>;
// Crypto SM3 ops
-def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
+def : InstRW<[CortexA510MCWrite<9, 0, CortexA510UnitVMC>], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
"^SM3TT[12][AB]$")>;
// Crypto SM4 ops
-def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instrs SM4E, SM4ENCKEY)>;
+def : InstRW<[CortexA510MCWrite<9, 0, CortexA510UnitVMC>], (instrs SM4E, SM4ENCKEY)>;
// CRC
// -----------------------------------------------------------------------------
@@ -540,25 +541,25 @@ def : InstRW<[CortexA510MCWrite<2, 0, CortexA510UnitMAC>], (instregex "^CRC32")>
// SVE Predicate instructions
// Loop control, based on predicate
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs BRKA_PPmP, BRKA_PPzP,
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instrs BRKA_PPmP, BRKA_PPzP,
BRKB_PPmP, BRKB_PPzP)>;
// Loop control, based on predicate and flag setting
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
// Loop control, propagating
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
// Loop control, propagating and flag setting
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs BRKNS_PPzP)>;
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs BRKPAS_PPzPP, BRKPBS_PPzPP)>;
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instrs BRKNS_PPzP)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instrs BRKPAS_PPzPP, BRKPBS_PPzPP)>;
// Loop control, based on GPR
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>],
(instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
// Loop terminate
def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
@@ -569,20 +570,20 @@ def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs ADDPL_XXI, ADDVL_X
def : InstRW<[CortexA510Write<1, CortexA510UnitALU>],
(instregex "^CNT[BHWD]_XPiI")>;
-def : InstRW<[CortexA510Write<1, CortexA510UnitALU>],
+def : InstRW<[CortexA510Write<3, CortexA510UnitALU>],
(instregex "^(INC|DEC)[BHWD]_XPiI")>;
-def : InstRW<[CortexA510Write<1, CortexA510UnitALU>],
+def : InstRW<[CortexA510Write<4, CortexA510UnitALU>],
(instregex "^(SQINC|SQDEC|UQINC|UQDEC)[BHWD]_[XW]Pi(Wd)?I")>;
// Predicate counting scalar, active predicate
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>],
(instregex "^CNTP_XPP_[BHSD]")>;
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>],
(instregex "^(DEC|INC)P_XP_[BHSD]")>;
-def : InstRW<[CortexA510Write<8, CortexA510UnitVALU0>],
+def : InstRW<[CortexA510Write<9, CortexA510UnitVALU0>],
(instregex "^(SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
"^(UQDEC|UQINC)P_WP_[BHSD]",
"^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]")>;
@@ -593,39 +594,39 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
(instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
// Predicate logical
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>],
(instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
// Predicate logical, flag setting
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>],
(instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
// Predicate reverse
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^REV_PP_[BHSD]")>;
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instregex "^REV_PP_[BHSD]")>;
// Predicate select
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs SEL_PPPP)>;
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instrs SEL_PPPP)>;
// Predicate set
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
// Predicate set/initialize, set flags
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PTRUES_[BHSD]")>;
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instregex "^PTRUES_[BHSD]")>;
// Predicate find first/next
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
// Predicate test
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs PTEST_PP)>;
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instrs PTEST_PP)>;
// Predicate transpose
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^TRN[12]_PPP_[BHSDQ]")>;
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instregex "^TRN[12]_PPP_[BHSDQ]")>;
// Predicate unpack and widen
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
// Predicate zip/unzip
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]")>;
+def : InstRW<[CortexA510Write<2, CortexA510UnitVALU0>], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]")>;
// SVE integer instructions
@@ -634,10 +635,10 @@ def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^(ZIP|UZP)[1
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABD_(ZPmZ|ZPZZ)_[BHSD]")>;
// Arithmetic, absolute diff accum
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
+def : InstRW<[CortexA510MCWrite<6, 2, CortexA510UnitVALU>], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
// Arithmetic, absolute diff accum long
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
+def : InstRW<[CortexA510MCWrite<6, 2, CortexA510UnitVALU>], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
// Arithmetic, absolute diff long
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
@@ -651,20 +652,22 @@ def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
"^(ADD|SUB|SUBR)_ZI_[BHSD]",
"^ADR_[SU]XTW_ZZZ_D_[0123]",
"^ADR_LSL_ZZZ_[SD]_[0123]",
- "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
+ "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
+ (instregex "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
"^SADDLBT_ZZZ_[HSD]",
- "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
"^SSUBL(BT|TB)_ZZZ_[HSD]")>;
// Arithmetic, complex
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
- (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
- "^SQ(ABS|NEG)_ZPmZ_[BHSD]",
+ (instregex "^SQ(ABS|NEG)_ZPmZ_[BHSD]",
"^SQ(ADD|SUB|SUBR)_ZPmZ_?[BHSD]",
"^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
"^[SU]Q(ADD|SUB)_ZI_[BHSD]",
"^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
"^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
+def : InstRW<[CortexA510Write<8, CortexA510UnitVALU>],
+ (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]")>;
// Arithmetic, large integer
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
@@ -735,14 +738,14 @@ def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(BSL|BSL1N|B
// Count/reverse bits
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(CLS|CLZ|RBIT)_ZPmZ_[BHSD]")>;
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_[BH]")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_[BH]")>;
def : InstRW<[CortexA510Write<8, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_S")>;
def : InstRW<[CortexA510Write<12, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_D")>;
// Broadcast logical bitmask immediate to vector
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instrs DUPM_ZI)>;
// Compare and set flags
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
(instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
"^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
@@ -939,12 +942,14 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZ
// Multiply/multiply long, (8x8) polynomial
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^PMUL_ZZZ_B")>;
-def : InstRW<[CortexA510Write<6, CortexA510UnitVMC>], (instregex "^PMULL[BT]_ZZZ_[HDQ]")>;
+def : InstRW<[CortexA510Write<9, CortexA510UnitVMC>], (instregex "^PMULL[BT]_ZZZ_[HDQ]")>;
// Predicate counting vector
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
+ (instregex "^(DEC|INC)[HWD]_ZPiI")>;
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
- (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI")>;
+ (instregex "^(SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI")>;
// Reciprocal estimate
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
@@ -965,7 +970,7 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^[SU](ADD|MA
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]")>;
// Reverse, vector
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^REV_ZZ_[BHSD]",
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^REV_ZZ_[BHSD]",
"^REVB_ZPmZ_[HSD]",
"^REVH_ZPmZ_[SD]",
"^REVW_ZPmZ_D")>;
@@ -980,13 +985,13 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBL_ZZZZ?_[B
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBX_ZZZ_[BHSD]")>;
// Transpose, vector form
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
// Unpack and extend
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
// Zip/unzip
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
// SVE floating-point instructions
// -----------------------------------------------------------------------------
@@ -1142,7 +1147,7 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTMAD_ZZI_[H
// Floating point trigonometric, miscellaneous
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTSMUL_ZZZ_[HSD]")>;
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FTSSEL_ZZZ_[HSD]")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^FTSSEL_ZZZ_[HSD]")>;
// SVE BFloat16 (BF16) instructions
@@ -1251,12 +1256,12 @@ def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>],
...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/87293
More information about the llvm-commits
mailing list