[llvm] r209576 - AArch64/ARM64: remove AArch64 from tree prior to renaming ARM64.

Sat May 24 15:39:52 PDT 2014

Congratulations!

 355 files changed, 73 insertions(+), 67373 deletions(-)

On 24 May 2014 08:42, Tim Northover <tnorthover at apple.com> wrote:
> Author: tnorthover
> Date: Sat May 24 07:42:26 2014
> New Revision: 209576
>
> URL: http://llvm.org/viewvc/llvm-project?rev=209576&view=rev
> Log:
> AArch64/ARM64: remove AArch64 from tree prior to renaming ARM64.
>
> I'm doing this in two phases for a better "git blame" record. This
> commit removes the previous AArch64 backend and redirects all
> functionality to ARM64. It also deduplicates test-lines and removes
> orphaned AArch64 tests.
>
> The next step will be "git mv ARM64 AArch64" and rewire most of the
> tests.
>
> Hopefully LLVM is still functional, though it would be even better if
> no-one ever had to care because the rename happens straight
> afterwards.
>
> Removed:
>     llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
>     llvm/trunk/lib/Target/AArch64/AArch64.h
>     llvm/trunk/lib/Target/AArch64/AArch64.td
>     llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.h
>     llvm/trunk/lib/Target/AArch64/AArch64BranchFixupPass.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64CallingConv.td
>     llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
>     llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
>     llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td
>     llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
>     llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
>     llvm/trunk/lib/Target/AArch64/AArch64InstrNEON.td
>     llvm/trunk/lib/Target/AArch64/AArch64MCInstLower.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64MachineFunctionInfo.h
>     llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.h
>     llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.td
>     llvm/trunk/lib/Target/AArch64/AArch64Schedule.td
>     llvm/trunk/lib/Target/AArch64/AArch64ScheduleA53.td
>     llvm/trunk/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64SelectionDAGInfo.h
>     llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
>     llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.h
>     llvm/trunk/lib/Target/AArch64/AArch64TargetObjectFile.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64TargetObjectFile.h
>     llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
>     llvm/trunk/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
>     llvm/trunk/lib/Target/AArch64/AsmParser/CMakeLists.txt
>     llvm/trunk/lib/Target/AArch64/AsmParser/LLVMBuild.txt
>     llvm/trunk/lib/Target/AArch64/AsmParser/Makefile
>     llvm/trunk/lib/Target/AArch64/CMakeLists.txt
>     llvm/trunk/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
>     llvm/trunk/lib/Target/AArch64/Disassembler/CMakeLists.txt
>     llvm/trunk/lib/Target/AArch64/Disassembler/LLVMBuild.txt
>     llvm/trunk/lib/Target/AArch64/Disassembler/Makefile
>     llvm/trunk/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
>     llvm/trunk/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
>     llvm/trunk/lib/Target/AArch64/InstPrinter/CMakeLists.txt
>     llvm/trunk/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
>     llvm/trunk/lib/Target/AArch64/InstPrinter/Makefile
>     llvm/trunk/lib/Target/AArch64/LLVMBuild.txt
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
>     llvm/trunk/lib/Target/AArch64/MCTargetDesc/Makefile
>     llvm/trunk/lib/Target/AArch64/Makefile
>     llvm/trunk/lib/Target/AArch64/README.txt
>     llvm/trunk/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
>     llvm/trunk/lib/Target/AArch64/TargetInfo/CMakeLists.txt
>     llvm/trunk/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
>     llvm/trunk/lib/Target/AArch64/TargetInfo/Makefile
>     llvm/trunk/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
>     llvm/trunk/lib/Target/AArch64/Utils/AArch64BaseInfo.h
>     llvm/trunk/lib/Target/AArch64/Utils/CMakeLists.txt
>     llvm/trunk/lib/Target/AArch64/Utils/LLVMBuild.txt
>     llvm/trunk/lib/Target/AArch64/Utils/Makefile
>     llvm/trunk/test/CodeGen/AArch64/andCmpBrToTBZ.ll
>     llvm/trunk/test/CodeGen/AArch64/concatvector-bugs.ll
>     llvm/trunk/test/CodeGen/AArch64/fp128.ll
>     llvm/trunk/test/CodeGen/AArch64/global_merge_1.ll
>     llvm/trunk/test/CodeGen/AArch64/i128-shift.ll
>     llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints.ll
>     llvm/trunk/test/CodeGen/AArch64/inline-asm-modifiers.ll
>     llvm/trunk/test/CodeGen/AArch64/large-frame.ll
>     llvm/trunk/test/CodeGen/AArch64/lit.local.cfg
>     llvm/trunk/test/CodeGen/AArch64/literal_pools_int.ll
>     llvm/trunk/test/CodeGen/AArch64/misched-basic-A53.ll
>     llvm/trunk/test/CodeGen/AArch64/named-reg-alloc.ll
>     llvm/trunk/test/CodeGen/AArch64/named-reg-notareg.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-2velem-high.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-2velem.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-3vdiff.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-aba-abd.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-across.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-add-pairwise.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-add-sub.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-bsl.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-copy.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-crypto.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-facge-facgt.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-frsqrt-frecp.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-halving-add-sub.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-load-store-v1i32.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-max-min-pairwise.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-max-min.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-misc-scalar.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-misc.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-mul-div.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-rounding-halving-add.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-rounding-shift.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-saturating-add-sub.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-saturating-shift.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-abs.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-add-sub.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-compare.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-cvt.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-ext.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-fabd.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-fcvt.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-fp-compare.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-mul.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-neg.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-shift.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst-one.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-simd-shift.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-simd-tbl.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-simd-vget.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-v1i1-setcc.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-vector-list-spill.ll
>     llvm/trunk/test/CodeGen/AArch64/regress-wzr-allocatable.ll
>     llvm/trunk/test/CodeGen/AArch64/sext_inreg.ll
>     llvm/trunk/test/CodeGen/AArch64/stackpointer.ll
>     llvm/trunk/test/CodeGen/AArch64/tls-dynamic-together.ll
>     llvm/trunk/test/CodeGen/AArch64/tls-dynamics.ll
>     llvm/trunk/test/CodeGen/AArch64/tls-execs.ll
>     llvm/trunk/test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll
>     llvm/trunk/test/CodeGen/AArch64/variadic.ll
>     llvm/trunk/test/DebugInfo/AArch64/cfi-frame.ll
>     llvm/trunk/test/DebugInfo/AArch64/variable-loc.ll
>     llvm/trunk/test/MC/AArch64/elf-reloc-addend.s
> Modified:
>     llvm/trunk/CMakeLists.txt
>     llvm/trunk/autoconf/configure.ac
>     llvm/trunk/configure
>     llvm/trunk/include/llvm/IR/Intrinsics.td
>     llvm/trunk/lib/Target/ARM64/ARM64AsmPrinter.cpp
>     llvm/trunk/lib/Target/ARM64/ARM64TargetMachine.cpp
>     llvm/trunk/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp
>     llvm/trunk/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp
>     llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp
>     llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h
>     llvm/trunk/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp
>     llvm/trunk/lib/Target/LLVMBuild.txt
>     llvm/trunk/test/CodeGen/AArch64/128bit_load_store.ll
>     llvm/trunk/test/CodeGen/AArch64/adc.ll
>     llvm/trunk/test/CodeGen/AArch64/addsub-shifted.ll
>     llvm/trunk/test/CodeGen/AArch64/addsub.ll
>     llvm/trunk/test/CodeGen/AArch64/addsub_ext.ll
>     llvm/trunk/test/CodeGen/AArch64/alloca.ll
>     llvm/trunk/test/CodeGen/AArch64/analyze-branch.ll
>     llvm/trunk/test/CodeGen/AArch64/assertion-rc-mismatch.ll
>     llvm/trunk/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
>     llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll
>     llvm/trunk/test/CodeGen/AArch64/basic-pic.ll
>     llvm/trunk/test/CodeGen/AArch64/bitfield-insert-0.ll
>     llvm/trunk/test/CodeGen/AArch64/bitfield-insert.ll
>     llvm/trunk/test/CodeGen/AArch64/bitfield.ll
>     llvm/trunk/test/CodeGen/AArch64/blockaddress.ll
>     llvm/trunk/test/CodeGen/AArch64/bool-loads.ll
>     llvm/trunk/test/CodeGen/AArch64/breg.ll
>     llvm/trunk/test/CodeGen/AArch64/callee-save.ll
>     llvm/trunk/test/CodeGen/AArch64/code-model-large-abs.ll
>     llvm/trunk/test/CodeGen/AArch64/compare-branch.ll
>     llvm/trunk/test/CodeGen/AArch64/complex-copy-noneon.ll
>     llvm/trunk/test/CodeGen/AArch64/cond-sel.ll
>     llvm/trunk/test/CodeGen/AArch64/cpus.ll
>     llvm/trunk/test/CodeGen/AArch64/directcond.ll
>     llvm/trunk/test/CodeGen/AArch64/dp-3source.ll
>     llvm/trunk/test/CodeGen/AArch64/dp1.ll
>     llvm/trunk/test/CodeGen/AArch64/dp2.ll
>     llvm/trunk/test/CodeGen/AArch64/eliminate-trunc.ll
>     llvm/trunk/test/CodeGen/AArch64/extern-weak.ll
>     llvm/trunk/test/CodeGen/AArch64/extract.ll
>     llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll
>     llvm/trunk/test/CodeGen/AArch64/fastcc.ll
>     llvm/trunk/test/CodeGen/AArch64/fcmp.ll
>     llvm/trunk/test/CodeGen/AArch64/fcvt-fixed.ll
>     llvm/trunk/test/CodeGen/AArch64/fcvt-int.ll
>     llvm/trunk/test/CodeGen/AArch64/flags-multiuse.ll
>     llvm/trunk/test/CodeGen/AArch64/floatdp_1source.ll
>     llvm/trunk/test/CodeGen/AArch64/floatdp_2source.ll
>     llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll
>     llvm/trunk/test/CodeGen/AArch64/fp-dp3.ll
>     llvm/trunk/test/CodeGen/AArch64/fp128-folding.ll
>     llvm/trunk/test/CodeGen/AArch64/fpimm.ll
>     llvm/trunk/test/CodeGen/AArch64/frameaddr.ll
>     llvm/trunk/test/CodeGen/AArch64/free-zext.ll
>     llvm/trunk/test/CodeGen/AArch64/func-argpassing.ll
>     llvm/trunk/test/CodeGen/AArch64/func-calls.ll
>     llvm/trunk/test/CodeGen/AArch64/global-alignment.ll
>     llvm/trunk/test/CodeGen/AArch64/got-abuse.ll
>     llvm/trunk/test/CodeGen/AArch64/i128-align.ll
>     llvm/trunk/test/CodeGen/AArch64/illegal-float-ops.ll
>     llvm/trunk/test/CodeGen/AArch64/init-array.ll
>     llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
>     llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
>     llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
>     llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
>     llvm/trunk/test/CodeGen/AArch64/jump-table.ll
>     llvm/trunk/test/CodeGen/AArch64/large-consts.ll
>     llvm/trunk/test/CodeGen/AArch64/ldst-regoffset.ll
>     llvm/trunk/test/CodeGen/AArch64/ldst-unscaledimm.ll
>     llvm/trunk/test/CodeGen/AArch64/ldst-unsignedimm.ll
>     llvm/trunk/test/CodeGen/AArch64/literal_pools_float.ll
>     llvm/trunk/test/CodeGen/AArch64/local_vars.ll
>     llvm/trunk/test/CodeGen/AArch64/logical-imm.ll
>     llvm/trunk/test/CodeGen/AArch64/logical_shifted_reg.ll
>     llvm/trunk/test/CodeGen/AArch64/mature-mc-support.ll
>     llvm/trunk/test/CodeGen/AArch64/movw-consts.ll
>     llvm/trunk/test/CodeGen/AArch64/movw-shift-encoding.ll
>     llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-bitcast.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-bitwise-instructions.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-compare-instructions.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-diagnostics.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-extract.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-fma.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-fpround_f128.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-idiv.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-mla-mls.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-mov.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-or-combine.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-perm.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-shift-left-long.ll
>     llvm/trunk/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
>     llvm/trunk/test/CodeGen/AArch64/pic-eh-stubs.ll
>     llvm/trunk/test/CodeGen/AArch64/ragreedy-csr.ll
>     llvm/trunk/test/CodeGen/AArch64/regress-bitcast-formals.ll
>     llvm/trunk/test/CodeGen/AArch64/regress-f128csel-flags.ll
>     llvm/trunk/test/CodeGen/AArch64/regress-fp128-livein.ll
>     llvm/trunk/test/CodeGen/AArch64/regress-tail-livereg.ll
>     llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll
>     llvm/trunk/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
>     llvm/trunk/test/CodeGen/AArch64/returnaddr.ll
>     llvm/trunk/test/CodeGen/AArch64/setcc-takes-i32.ll
>     llvm/trunk/test/CodeGen/AArch64/sibling-call.ll
>     llvm/trunk/test/CodeGen/AArch64/sincos-expansion.ll
>     llvm/trunk/test/CodeGen/AArch64/sincospow-vector-expansion.ll
>     llvm/trunk/test/CodeGen/AArch64/tail-call.ll
>     llvm/trunk/test/CodeGen/AArch64/tst-br.ll
>     llvm/trunk/test/CodeGen/AArch64/zero-reg.ll
>     llvm/trunk/test/DebugInfo/AArch64/lit.local.cfg
>     llvm/trunk/test/MC/AArch64/adrp-relocation.s
>     llvm/trunk/test/MC/AArch64/basic-a64-diagnostics.s
>     llvm/trunk/test/MC/AArch64/basic-a64-instructions.s
>     llvm/trunk/test/MC/AArch64/basic-pic.s
>     llvm/trunk/test/MC/AArch64/elf-extern.s
>     llvm/trunk/test/MC/AArch64/elf-objdump.s
>     llvm/trunk/test/MC/AArch64/elf-reloc-addsubimm.s
>     llvm/trunk/test/MC/AArch64/elf-reloc-ldrlit.s
>     llvm/trunk/test/MC/AArch64/elf-reloc-ldstunsimm.s
>     llvm/trunk/test/MC/AArch64/elf-reloc-movw.s
>     llvm/trunk/test/MC/AArch64/elf-reloc-pcreladdressing.s
>     llvm/trunk/test/MC/AArch64/elf-reloc-tstb.s
>     llvm/trunk/test/MC/AArch64/elf-reloc-uncondbrimm.s
>     llvm/trunk/test/MC/AArch64/gicv3-regs-diagnostics.s
>     llvm/trunk/test/MC/AArch64/gicv3-regs.s
>     llvm/trunk/test/MC/AArch64/inline-asm-modifiers.s
>     llvm/trunk/test/MC/AArch64/jump-table.s
>     llvm/trunk/test/MC/AArch64/lit.local.cfg
>     llvm/trunk/test/MC/AArch64/mapping-across-sections.s
>     llvm/trunk/test/MC/AArch64/mapping-within-section.s
>     llvm/trunk/test/MC/AArch64/neon-2velem.s
>     llvm/trunk/test/MC/AArch64/neon-3vdiff.s
>     llvm/trunk/test/MC/AArch64/neon-aba-abd.s
>     llvm/trunk/test/MC/AArch64/neon-across.s
>     llvm/trunk/test/MC/AArch64/neon-add-pairwise.s
>     llvm/trunk/test/MC/AArch64/neon-add-sub-instructions.s
>     llvm/trunk/test/MC/AArch64/neon-bitwise-instructions.s
>     llvm/trunk/test/MC/AArch64/neon-compare-instructions.s
>     llvm/trunk/test/MC/AArch64/neon-crypto.s
>     llvm/trunk/test/MC/AArch64/neon-diagnostics.s
>     llvm/trunk/test/MC/AArch64/neon-extract.s
>     llvm/trunk/test/MC/AArch64/neon-facge-facgt.s
>     llvm/trunk/test/MC/AArch64/neon-frsqrt-frecp.s
>     llvm/trunk/test/MC/AArch64/neon-halving-add-sub.s
>     llvm/trunk/test/MC/AArch64/neon-max-min-pairwise.s
>     llvm/trunk/test/MC/AArch64/neon-max-min.s
>     llvm/trunk/test/MC/AArch64/neon-mla-mls-instructions.s
>     llvm/trunk/test/MC/AArch64/neon-mov.s
>     llvm/trunk/test/MC/AArch64/neon-mul-div-instructions.s
>     llvm/trunk/test/MC/AArch64/neon-perm.s
>     llvm/trunk/test/MC/AArch64/neon-rounding-halving-add.s
>     llvm/trunk/test/MC/AArch64/neon-rounding-shift.s
>     llvm/trunk/test/MC/AArch64/neon-saturating-add-sub.s
>     llvm/trunk/test/MC/AArch64/neon-saturating-rounding-shift.s
>     llvm/trunk/test/MC/AArch64/neon-saturating-shift.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-abs.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-add-sub.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-by-elem-mla.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-by-elem-mul.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-compare.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-cvt.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-dup.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-extract-narrow.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-fp-compare.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-mul.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-neg.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-recip.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-reduce-pairwise.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-rounding-shift.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-saturating-add-sub.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-saturating-shift.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-shift-imm.s
>     llvm/trunk/test/MC/AArch64/neon-scalar-shift.s
>     llvm/trunk/test/MC/AArch64/neon-shift-left-long.s
>     llvm/trunk/test/MC/AArch64/neon-shift.s
>     llvm/trunk/test/MC/AArch64/neon-simd-copy.s
>     llvm/trunk/test/MC/AArch64/neon-simd-ldst-multi-elem.s
>     llvm/trunk/test/MC/AArch64/neon-simd-ldst-one-elem.s
>     llvm/trunk/test/MC/AArch64/neon-simd-misc.s
>     llvm/trunk/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s
>     llvm/trunk/test/MC/AArch64/neon-simd-shift.s
>     llvm/trunk/test/MC/AArch64/neon-sxtl.s
>     llvm/trunk/test/MC/AArch64/neon-tbl.s
>     llvm/trunk/test/MC/AArch64/neon-uxtl.s
>     llvm/trunk/test/MC/AArch64/noneon-diagnostics.s
>     llvm/trunk/test/MC/AArch64/optional-hash.s
>     llvm/trunk/test/MC/AArch64/tls-relocs.s
>     llvm/trunk/test/MC/AArch64/trace-regs-diagnostics.s
>     llvm/trunk/test/MC/AArch64/trace-regs.s
>     llvm/trunk/test/MC/Disassembler/AArch64/lit.local.cfg
>     llvm/trunk/test/Transforms/LoopVectorize/AArch64/lit.local.cfg
>
> Modified: llvm/trunk/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/CMakeLists.txt?rev=209576&r1=209575&r2=209576&view=diff
> ==============================================================================
> --- llvm/trunk/CMakeLists.txt (original)
> +++ llvm/trunk/CMakeLists.txt Sat May 24 07:42:26 2014
> @@ -127,7 +127,6 @@ set(LLVM_INCLUDE_DIR ${CMAKE_CURRENT_BIN
>  set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" )
>
>  set(LLVM_ALL_TARGETS
> -  AArch64
>    ARM64
>    ARM
>    CppBackend
> @@ -144,7 +143,7 @@ set(LLVM_ALL_TARGETS
>    )
>
>  # List of targets with JIT support:
> -set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM64 ARM Mips SystemZ)
> +set(LLVM_TARGETS_WITH_JIT X86 PowerPC ARM64 ARM Mips SystemZ)
>
>  set(LLVM_TARGETS_TO_BUILD "all"
>      CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
>
> Modified: llvm/trunk/autoconf/configure.ac
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/autoconf/configure.ac?rev=209576&r1=209575&r2=209576&view=diff
> ==============================================================================
> --- llvm/trunk/autoconf/configure.ac (original)
> +++ llvm/trunk/autoconf/configure.ac Sat May 24 07:42:26 2014
> @@ -421,7 +421,7 @@ AC_CACHE_CHECK([target architecture],[ll
>    powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
>    arm64*-*)               llvm_cv_target_arch="ARM64" ;;
>    arm*-*)                 llvm_cv_target_arch="ARM" ;;
> -  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
> +  aarch64*-*)             llvm_cv_target_arch="ARM64" ;;
>    mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
>    mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
>    xcore-*)                llvm_cv_target_arch="XCore" ;;
> @@ -457,7 +457,7 @@ case $host in
>    powerpc*-*)             host_arch="PowerPC" ;;
>    arm64*-*)               host_arch="ARM64" ;;
>    arm*-*)                 host_arch="ARM" ;;
> -  aarch64*-*)             host_arch="AArch64" ;;
> +  aarch64*-*)             host_arch="ARM64" ;;
>    mips-* | mips64-*)      host_arch="Mips" ;;
>    mipsel-* | mips64el-*)  host_arch="Mips" ;;
>    xcore-*)                host_arch="XCore" ;;
> @@ -786,7 +786,6 @@ else
>      PowerPC)     AC_SUBST(TARGET_HAS_JIT,1) ;;
>      x86_64)      AC_SUBST(TARGET_HAS_JIT,1) ;;
>      ARM)         AC_SUBST(TARGET_HAS_JIT,1) ;;
> -    AArch64)     AC_SUBST(TARGET_HAS_JIT,0) ;;
>      Mips)        AC_SUBST(TARGET_HAS_JIT,1) ;;
>      XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
>      MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
> @@ -797,7 +796,7 @@ else
>    esac
>  fi
>
> -TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86"
> +TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86"
>  AC_SUBST(TARGETS_WITH_JIT,$TARGETS_WITH_JIT)
>
>  dnl Allow enablement of building and installing docs
> @@ -950,7 +949,7 @@ if test "$llvm_cv_enable_crash_overrides
>  fi
>
>  dnl List all possible targets
> -ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
> +ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
>  AC_SUBST(ALL_TARGETS,$ALL_TARGETS)
>
>  dnl Allow specific targets to be specified for building (or not)
> @@ -971,7 +970,7 @@ case "$enableval" in
>          x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
>          sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
>          powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
> -        aarch64)  TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
> +        aarch64)  TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
>          arm64)    TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
>          arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
>          mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
> @@ -990,7 +989,7 @@ case "$enableval" in
>              x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
>              Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
>              PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
> -            AArch64)     TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
> +            AArch64)     TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
>              ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
>              Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
>              XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
>
> Modified: llvm/trunk/configure
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/configure?rev=209576&r1=209575&r2=209576&view=diff
> ==============================================================================
> --- llvm/trunk/configure (original)
> +++ llvm/trunk/configure Sat May 24 07:42:26 2014
> @@ -4153,7 +4153,7 @@ else
>    powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
>    arm64*-*)               llvm_cv_target_arch="ARM64" ;;
>    arm*-*)                 llvm_cv_target_arch="ARM" ;;
> -  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
> +  aarch64*-*)             llvm_cv_target_arch="ARM64" ;;
>    mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
>    mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
>    xcore-*)                llvm_cv_target_arch="XCore" ;;
> @@ -4190,7 +4190,7 @@ case $host in
>    powerpc*-*)             host_arch="PowerPC" ;;
>    arm64*-*)               host_arch="ARM64" ;;
>    arm*-*)                 host_arch="ARM" ;;
> -  aarch64*-*)             host_arch="AArch64" ;;
> +  aarch64*-*)             host_arch="ARM64" ;;
>    mips-* | mips64-*)      host_arch="Mips" ;;
>    mipsel-* | mips64el-*)  host_arch="Mips" ;;
>    xcore-*)                host_arch="XCore" ;;
> @@ -5103,8 +5103,6 @@ else
>   ;;
>      ARM)         TARGET_HAS_JIT=1
>   ;;
> -    AArch64)     TARGET_HAS_JIT=0
> - ;;
>      Mips)        TARGET_HAS_JIT=1
>   ;;
>      XCore)       TARGET_HAS_JIT=0
> @@ -5122,7 +5120,7 @@ else
>    esac
>  fi
>
> -TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86"
> +TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86"
>  TARGETS_WITH_JIT=$TARGETS_WITH_JIT
>
>
> @@ -5359,7 +5357,7 @@ _ACEOF
>
>  fi
>
> -ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
> +ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
>  ALL_TARGETS=$ALL_TARGETS
>
>
> @@ -5382,7 +5380,7 @@ case "$enableval" in
>          x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
>          sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
>          powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
> -        aarch64)  TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
> +        aarch64)  TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
>          arm64)    TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
>          arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
>          mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
> @@ -5401,7 +5399,7 @@ case "$enableval" in
>              x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
>              Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
>              PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
> -            AArch64)     TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
> +            AArch64)     TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
>              ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
>              Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
>              XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
>
> Modified: llvm/trunk/include/llvm/IR/Intrinsics.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/Intrinsics.td?rev=209576&r1=209575&r2=209576&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/IR/Intrinsics.td (original)
> +++ llvm/trunk/include/llvm/IR/Intrinsics.td Sat May 24 07:42:26 2014
> @@ -534,7 +534,6 @@ include "llvm/IR/IntrinsicsPowerPC.td"
>  include "llvm/IR/IntrinsicsX86.td"
>  include "llvm/IR/IntrinsicsARM.td"
>  include "llvm/IR/IntrinsicsARM64.td"
> -include "llvm/IR/IntrinsicsAArch64.td"
>  include "llvm/IR/IntrinsicsXCore.td"
>  include "llvm/IR/IntrinsicsHexagon.td"
>  include "llvm/IR/IntrinsicsNVVM.td"
>
> Removed: llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td (original)
> +++ llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td (removed)
> @@ -1,407 +0,0 @@
> -//===- IntrinsicsAArch64.td - Defines AArch64 intrinsics -----------*- tablegen -*-===//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file defines all of the AArch64-specific intrinsics.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -//===----------------------------------------------------------------------===//
> -// Advanced SIMD (NEON)
> -
> -let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
> -
> -// Vector Absolute Compare (Floating Point)
> -def int_aarch64_neon_vacgeq :
> -  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vacgtq :
> -  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
> -
> -// Vector saturating accumulate
> -def int_aarch64_neon_suqadd : Neon_2Arg_Intrinsic;
> -def int_aarch64_neon_usqadd : Neon_2Arg_Intrinsic;
> -
> -// Vector Bitwise reverse
> -def int_aarch64_neon_rbit : Neon_1Arg_Intrinsic;
> -
> -// Vector extract and narrow
> -def int_aarch64_neon_xtn :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Vector floating-point convert
> -def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic;
> -def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic;
> -def int_aarch64_neon_vcvtxn :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vcvtzs :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vcvtzu :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Vector maxNum (Floating Point)
> -def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
> -
> -// Vector minNum (Floating Point)
> -def int_aarch64_neon_vminnm : Neon_2Arg_Intrinsic;
> -
> -// Vector Pairwise maxNum (Floating Point)
> -def int_aarch64_neon_vpmaxnm : Neon_2Arg_Intrinsic;
> -
> -// Vector Pairwise minNum (Floating Point)
> -def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
> -
> -// Vector Multiply Extended and Scalar Multiply Extended (Floating Point)
> -def int_aarch64_neon_vmulx  :
> -  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>;
> -
> -class Neon_N2V_Intrinsic
> -  : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty],
> -              [IntrNoMem]>;
> -class Neon_N3V_Intrinsic
> -  : Intrinsic<[llvm_anyvector_ty],
> -              [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
> -              [IntrNoMem]>;
> -class Neon_N2V_Narrow_Intrinsic
> -  : Intrinsic<[llvm_anyvector_ty],
> -              [LLVMExtendedType<0>, llvm_i32_ty],
> -              [IntrNoMem]>;
> -
> -// Vector rounding shift right by immediate (Signed)
> -def int_aarch64_neon_vsrshr : Neon_N2V_Intrinsic;
> -def int_aarch64_neon_vurshr : Neon_N2V_Intrinsic;
> -def int_aarch64_neon_vsqshlu : Neon_N2V_Intrinsic;
> -
> -def int_aarch64_neon_vsri : Neon_N3V_Intrinsic;
> -def int_aarch64_neon_vsli : Neon_N3V_Intrinsic;
> -
> -def int_aarch64_neon_vsqshrun : Neon_N2V_Narrow_Intrinsic;
> -def int_aarch64_neon_vrshrn : Neon_N2V_Narrow_Intrinsic;
> -def int_aarch64_neon_vsqrshrun : Neon_N2V_Narrow_Intrinsic;
> -def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic;
> -def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic;
> -def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic;
> -def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic;
> -
> -// Vector across
> -class Neon_Across_Intrinsic
> -  : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -def int_aarch64_neon_saddlv : Neon_Across_Intrinsic;
> -def int_aarch64_neon_uaddlv : Neon_Across_Intrinsic;
> -def int_aarch64_neon_smaxv  : Neon_Across_Intrinsic;
> -def int_aarch64_neon_umaxv  : Neon_Across_Intrinsic;
> -def int_aarch64_neon_sminv  : Neon_Across_Intrinsic;
> -def int_aarch64_neon_uminv  : Neon_Across_Intrinsic;
> -def int_aarch64_neon_vaddv  : Neon_Across_Intrinsic;
> -def int_aarch64_neon_vmaxv :
> -  Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vminv :
> -  Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vmaxnmv :
> -  Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vminnmv :
> -  Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
> -
> -// Vector Table Lookup.
> -def int_aarch64_neon_vtbl1 :
> -  Intrinsic<[llvm_anyvector_ty],
> -            [llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
> -
> -def int_aarch64_neon_vtbl2 :
> -  Intrinsic<[llvm_anyvector_ty],
> -            [llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
> -            [IntrNoMem]>;
> -
> -def int_aarch64_neon_vtbl3 :
> -  Intrinsic<[llvm_anyvector_ty],
> -            [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
> -            LLVMMatchType<0>], [IntrNoMem]>;
> -
> -def int_aarch64_neon_vtbl4 :
> -  Intrinsic<[llvm_anyvector_ty],
> -            [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
> -            llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
> -
> -// Vector Table Extension.
> -// Some elements of the destination vector may not be updated, so the original
> -// value of that vector is passed as the first argument.  The next 1-4
> -// arguments after that are the table.
> -def int_aarch64_neon_vtbx1 :
> -  Intrinsic<[llvm_anyvector_ty],
> -            [LLVMMatchType<0>, llvm_v16i8_ty, LLVMMatchType<0>],
> -            [IntrNoMem]>;
> -
> -def int_aarch64_neon_vtbx2 :
> -  Intrinsic<[llvm_anyvector_ty],
> -            [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
> -             LLVMMatchType<0>], [IntrNoMem]>;
> -
> -def int_aarch64_neon_vtbx3 :
> -  Intrinsic<[llvm_anyvector_ty],
> -            [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
> -             llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
> -
> -def int_aarch64_neon_vtbx4 :
> -  Intrinsic<[llvm_anyvector_ty],
> -            [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
> -             llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
> -            [IntrNoMem]>;
> -
> -// Vector Load/store
> -def int_aarch64_neon_vld1x2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
> -                                        [llvm_ptr_ty, llvm_i32_ty],
> -                                        [IntrReadArgMem]>;
> -def int_aarch64_neon_vld1x3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
> -                                         LLVMMatchType<0>],
> -                                        [llvm_ptr_ty, llvm_i32_ty],
> -                                        [IntrReadArgMem]>;
> -def int_aarch64_neon_vld1x4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
> -                                         LLVMMatchType<0>, LLVMMatchType<0>],
> -                                        [llvm_ptr_ty, llvm_i32_ty],
> -                                        [IntrReadArgMem]>;
> -
> -def int_aarch64_neon_vst1x2 : Intrinsic<[],
> -                                        [llvm_ptr_ty, llvm_anyvector_ty,
> -                                         LLVMMatchType<0>, llvm_i32_ty],
> -                                        [IntrReadWriteArgMem]>;
> -def int_aarch64_neon_vst1x3 : Intrinsic<[],
> -                                        [llvm_ptr_ty, llvm_anyvector_ty,
> -                                         LLVMMatchType<0>, LLVMMatchType<0>,
> -                                         llvm_i32_ty], [IntrReadWriteArgMem]>;
> -def int_aarch64_neon_vst1x4 : Intrinsic<[],
> -                                        [llvm_ptr_ty, llvm_anyvector_ty,
> -                                         LLVMMatchType<0>, LLVMMatchType<0>,
> -                                         LLVMMatchType<0>, llvm_i32_ty],
> -                                        [IntrReadWriteArgMem]>;
> -
> -// Scalar Add
> -def int_aarch64_neon_vaddds :
> -  Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vadddu :
> -  Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -
> -
> -// Scalar Sub
> -def int_aarch64_neon_vsubds :
> -  Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vsubdu :
> -  Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -
> -
> -// Scalar Shift
> -// Scalar Shift Left
> -def int_aarch64_neon_vshlds :
> -  Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vshldu :
> -  Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -
> -// Scalar Saturating Shift Left
> -def int_aarch64_neon_vqshls : Neon_2Arg_Intrinsic;
> -def int_aarch64_neon_vqshlu : Neon_2Arg_Intrinsic;
> -
> -// Scalar Shift Rouding Left
> -def int_aarch64_neon_vrshlds :
> -  Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vrshldu :
> -  Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -
> -// Scalar Saturating Rounding Shift Left
> -def int_aarch64_neon_vqrshls : Neon_2Arg_Intrinsic;
> -def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic;
> -
> -// Scalar Reduce Pairwise Add.
> -def int_aarch64_neon_vpadd :
> -  Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>;
> -def int_aarch64_neon_vpfadd :
> -  Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Scalar Reduce Pairwise Floating Point Max/Min.
> -def int_aarch64_neon_vpmax :
> -  Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vpmin :
> -  Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Scalar Reduce Pairwise Floating Point Maxnm/Minnm.
> -def int_aarch64_neon_vpfmaxnm :
> -  Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vpfminnm :
> -  Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Scalar Signed Integer Convert To Floating-point
> -def int_aarch64_neon_vcvtint2fps :
> -  Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Scalar Unsigned Integer Convert To Floating-point
> -def int_aarch64_neon_vcvtint2fpu :
> -  Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Scalar Floating-point Convert
> -def int_aarch64_neon_fcvtxn :
> -  Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtns :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtnu :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtps :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtpu :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtms :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtmu :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtas :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtau :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtzs :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtzu :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -
> -// Scalar Floating-point Reciprocal Estimate.
> -def int_aarch64_neon_vrecpe :
> -  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
> -
> -// Scalar Floating-point Reciprocal Exponent
> -def int_aarch64_neon_vrecpx :
> -  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
> -
> -// Scalar Floating-point Reciprocal Square Root Estimate
> -def int_aarch64_neon_vrsqrte :
> -  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
> -
> -// Scalar Floating-point Reciprocal Step
> -def int_aarch64_neon_vrecps :
> -  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
> -            [IntrNoMem]>;
> -
> -// Scalar Floating-point Reciprocal Square Root Step
> -def int_aarch64_neon_vrsqrts :
> -  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
> -            [IntrNoMem]>;
> -
> -// Compare with vector operands.
> -class Neon_Cmp_Intrinsic :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyvector_ty],
> -            [IntrNoMem]>;
> -
> -// Floating-point compare with scalar operands.
> -class Neon_Float_Cmp_Intrinsic :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_anyfloat_ty],
> -            [IntrNoMem]>;
> -
> -// Scalar Compare Equal
> -def int_aarch64_neon_vceq : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fceq : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Compare Greater-Than or Equal
> -def int_aarch64_neon_vcge : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_vchs : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fcge : Neon_Float_Cmp_Intrinsic;
> -def int_aarch64_neon_fchs : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Compare Less-Than or Equal
> -def int_aarch64_neon_vclez : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fclez : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Compare Less-Than
> -def int_aarch64_neon_vcltz : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fcltz : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Compare Greater-Than
> -def int_aarch64_neon_vcgt : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_vchi : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fcgt : Neon_Float_Cmp_Intrinsic;
> -def int_aarch64_neon_fchi : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Compare Bitwise Test Bits
> -def int_aarch64_neon_vtstd : Neon_Cmp_Intrinsic;
> -
> -// Scalar Floating-point Absolute Compare Greater Than Or Equal
> -def int_aarch64_neon_vcage : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fcage : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Floating-point Absolute Compare Greater Than
> -def int_aarch64_neon_vcagt : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fcagt : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Signed Saturating Accumulated of Unsigned Value
> -def int_aarch64_neon_vuqadd : Neon_2Arg_Intrinsic;
> -
> -// Scalar Unsigned Saturating Accumulated of Signed Value
> -def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic;
> -
> -// Scalar Absolute Value
> -def int_aarch64_neon_vabs :
> -  Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
> -
> -// Scalar Absolute Difference
> -def int_aarch64_neon_vabd :
> -  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
> -            [IntrNoMem]>;
> -
> -// Scalar Negate Value
> -def int_aarch64_neon_vneg :
> -  Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
> -
> -// Signed Saturating Doubling Multiply-Add Long
> -def int_aarch64_neon_vqdmlal : Neon_3Arg_Long_Intrinsic;
> -
> -// Signed Saturating Doubling Multiply-Subtract Long
> -def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
> -
> -def int_aarch64_neon_vmull_p64 :
> -  Intrinsic<[llvm_v16i8_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -
> -class Neon_2Arg_ShiftImm_Intrinsic
> -  : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
> -
> -class Neon_3Arg_ShiftImm_Intrinsic
> -  : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty, llvm_i32_ty],
> -              [IntrNoMem]>;
> -
> -// Scalar Shift Right (Immediate)
> -def int_aarch64_neon_vshrds_n : Neon_2Arg_ShiftImm_Intrinsic;
> -def int_aarch64_neon_vshrdu_n : Neon_2Arg_ShiftImm_Intrinsic;
> -
> -// Scalar Shift Right and Accumulate (Immediate)
> -def int_aarch64_neon_vsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
> -def int_aarch64_neon_vsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
> -
> -// Scalar Rounding Shift Right and Accumulate (Immediate)
> -def int_aarch64_neon_vrsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
> -def int_aarch64_neon_vrsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
> -
> -// Scalar Shift Left (Immediate)
> -def int_aarch64_neon_vshld_n : Neon_2Arg_ShiftImm_Intrinsic;
> -
> -// Scalar Saturating Shift Left (Immediate)
> -def int_aarch64_neon_vqshls_n : Neon_N2V_Intrinsic;
> -def int_aarch64_neon_vqshlu_n : Neon_N2V_Intrinsic;
> -
> -// Scalar Signed Saturating Shift Left Unsigned (Immediate)
> -def int_aarch64_neon_vqshlus_n : Neon_N2V_Intrinsic;
> -
> -// Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
> -def int_aarch64_neon_vcvtfxs2fp_n :
> -  Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem]>;
> -
> -// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
> -def int_aarch64_neon_vcvtfxu2fp_n :
> -  Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem]>;
> -
> -// Scalar Floating-point Convert To Signed Fixed-point (Immediate)
> -def int_aarch64_neon_vcvtfp2fxs_n :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
> -
> -// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
> -def int_aarch64_neon_vcvtfp2fxu_n :
> -  Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
> -
> -}
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.h?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64.h (removed)
> @@ -1,46 +0,0 @@
> -//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file contains the entry points for global functions defined in the LLVM
> -// AArch64 back-end.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#ifndef LLVM_TARGET_AARCH64_H
> -#define LLVM_TARGET_AARCH64_H
> -
> -#include "MCTargetDesc/AArch64MCTargetDesc.h"
> -#include "llvm/Target/TargetMachine.h"
> -
> -namespace llvm {
> -
> -class AArch64AsmPrinter;
> -class FunctionPass;
> -class AArch64TargetMachine;
> -class MachineInstr;
> -class MCInst;
> -
> -FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM,
> -                                   CodeGenOpt::Level OptLevel);
> -
> -FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
> -
> -FunctionPass *createAArch64BranchFixupPass();
> -
> -/// \brief Creates an AArch64-specific Target Transformation Info pass.
> -ImmutablePass *createAArch64TargetTransformInfoPass(
> -                                                const AArch64TargetMachine *TM);
> -
> -void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
> -                                      AArch64AsmPrinter &AP);
> -
> -
> -}
> -
> -#endif
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64.td (removed)
> @@ -1,83 +0,0 @@
> -//===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This is the top level entry point for the AArch64 target.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -//===----------------------------------------------------------------------===//
> -// Target-independent interfaces
> -//===----------------------------------------------------------------------===//
> -
> -include "llvm/Target/Target.td"
> -
> -//===----------------------------------------------------------------------===//
> -// AArch64 Subtarget features.
> -//
> -
> -def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true",
> -  "Enable ARMv8 FP">;
> -
> -def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
> -  "Enable Advanced SIMD instructions", [FeatureFPARMv8]>;
> -
> -def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
> -  "Enable cryptographic instructions">;
> -
> -//===----------------------------------------------------------------------===//
> -// AArch64 Processors
> -//
> -
> -include "AArch64Schedule.td"
> -
> -class ProcNoItin<string Name, list<SubtargetFeature> Features>
> - : Processor<Name, NoItineraries, Features>;
> -
> -def : Processor<"generic", GenericItineraries, [FeatureFPARMv8, FeatureNEON]>;
> -
> -def ProcA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
> -                                   "Cortex-A53 ARM processors",
> -                                   [FeatureFPARMv8,
> -                                   FeatureNEON,
> -                                   FeatureCrypto]>;
> -
> -def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
> -                                   "Cortex-A57 ARM processors",
> -                                   [FeatureFPARMv8,
> -                                   FeatureNEON,
> -                                   FeatureCrypto]>;
> -
> -def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
> -def : Processor<"cortex-a57", NoItineraries, [ProcA57]>;
> -
> -//===----------------------------------------------------------------------===//
> -// Register File Description
> -//===----------------------------------------------------------------------===//
> -
> -include "AArch64RegisterInfo.td"
> -
> -include "AArch64CallingConv.td"
> -
> -//===----------------------------------------------------------------------===//
> -// Instruction Descriptions
> -//===----------------------------------------------------------------------===//
> -
> -include "AArch64InstrInfo.td"
> -
> -def AArch64InstrInfo : InstrInfo {
> -  let noNamedPositionallyEncodedOperands = 1;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Declare the target which we are implementing
> -//===----------------------------------------------------------------------===//
> -
> -def AArch64 : Target {
> -  let InstructionSet = AArch64InstrInfo;
> -}
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp (removed)
> @@ -1,303 +0,0 @@
> -//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file contains a printer that converts from our internal representation
> -// of machine-dependent LLVM code to GAS-format AArch64 assembly language.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#include "AArch64AsmPrinter.h"
> -#include "InstPrinter/AArch64InstPrinter.h"
> -#include "llvm/ADT/SmallString.h"
> -#include "llvm/CodeGen/MachineModuleInfoImpls.h"
> -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
> -#include "llvm/IR/DebugInfo.h"
> -#include "llvm/IR/Mangler.h"
> -#include "llvm/MC/MCAsmInfo.h"
> -#include "llvm/MC/MCInst.h"
> -#include "llvm/MC/MCSymbol.h"
> -#include "llvm/Support/TargetRegistry.h"
> -
> -using namespace llvm;
> -
> -#define DEBUG_TYPE "asm-printer"
> -
> -/// Try to print a floating-point register as if it belonged to a specified
> -/// register-class. For example the inline asm operand modifier "b" requires its
> -/// argument to be printed as "bN".
> -static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
> -                                       const TargetRegisterInfo *TRI,
> -                                       char RegType, raw_ostream &O) {
> -  if (!MO.isReg())
> -    return true;
> -
> -  for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
> -    if (AArch64::FPR8RegClass.contains(*AR)) {
> -      O << RegType << TRI->getEncodingValue(MO.getReg());
> -      return false;
> -    }
> -  }
> -
> -  // The register doesn't correspond to anything floating-point like.
> -  return true;
> -}
> -
> -/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR
> -/// with the obvious type and an immediate 0 as either wzr or xzr.
> -static bool printModifiedGPRAsmOperand(const MachineOperand &MO,
> -                                       const TargetRegisterInfo *TRI,
> -                                       const TargetRegisterClass &RegClass,
> -                                       raw_ostream &O) {
> -  char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x';
> -
> -  if (MO.isImm() && MO.getImm() == 0) {
> -    O << Prefix << "zr";
> -    return false;
> -  } else if (MO.isReg()) {
> -    if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) {
> -      O << (Prefix == 'x' ? "sp" : "wsp");
> -      return false;
> -    }
> -
> -    for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
> -      if (RegClass.contains(*AR)) {
> -        O << AArch64InstPrinter::getRegisterName(*AR);
> -        return false;
> -      }
> -    }
> -  }
> -
> -  return true;
> -}
> -
> -bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO,
> -                                             bool PrintImmediatePrefix,
> -                                             StringRef Suffix, raw_ostream &O) {
> -  StringRef Name;
> -  StringRef Modifier;
> -  switch (MO.getType()) {
> -  default:
> -    return true;
> -  case MachineOperand::MO_GlobalAddress:
> -    Name = getSymbol(MO.getGlobal())->getName();
> -
> -    // Global variables may be accessed either via a GOT or in various fun and
> -    // interesting TLS-model specific ways. Set the prefix modifier as
> -    // appropriate here.
> -    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) {
> -      Reloc::Model RelocM = TM.getRelocationModel();
> -      if (GV->isThreadLocal()) {
> -        switch (TM.getTLSModel(GV)) {
> -        case TLSModel::GeneralDynamic:
> -          Modifier = "tlsdesc";
> -          break;
> -        case TLSModel::LocalDynamic:
> -          Modifier = "dtprel";
> -          break;
> -        case TLSModel::InitialExec:
> -          Modifier = "gottprel";
> -          break;
> -        case TLSModel::LocalExec:
> -          Modifier = "tprel";
> -          break;
> -        }
> -      } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
> -        Modifier = "got";
> -      }
> -    }
> -    break;
> -  case MachineOperand::MO_BlockAddress:
> -    Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName();
> -    break;
> -  case MachineOperand::MO_ConstantPoolIndex:
> -    Name = GetCPISymbol(MO.getIndex())->getName();
> -    break;
> -  }
> -
> -  // Some instructions (notably ADRP) don't take the # prefix for
> -  // immediates. Only print it if asked to.
> -  if (PrintImmediatePrefix)
> -    O << '#';
> -
> -  // Only need the joining "_" if both the prefix and the suffix are
> -  // non-null. This little block simply takes care of the four possibly
> -  // combinations involved there.
> -  if (Modifier == "" && Suffix == "")
> -    O << Name;
> -  else if (Modifier == "" && Suffix != "")
> -    O << ":" << Suffix << ':' << Name;
> -  else if (Modifier != "" && Suffix == "")
> -    O << ":" << Modifier << ':' << Name;
> -  else
> -    O << ":" << Modifier << '_' << Suffix << ':' << Name;
> -
> -  return false;
> -}
> -
> -bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
> -                                        unsigned AsmVariant,
> -                                        const char *ExtraCode, raw_ostream &O) {
> -  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
> -
> -  if (!ExtraCode)
> -    ExtraCode = "";
> -
> -  switch(ExtraCode[0]) {
> -  default:
> -    if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O))
> -        return false;
> -    break;
> -  case 'w':
> -    // Output 32-bit general register operand, constant zero as wzr, or stack
> -    // pointer as wsp. Ignored when used with other operand types.
> -    if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
> -                                    AArch64::GPR32RegClass, O))
> -      return false;
> -    break;
> -  case 'x':
> -    // Output 64-bit general register operand, constant zero as xzr, or stack
> -    // pointer as sp. Ignored when used with other operand types.
> -    if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
> -                                    AArch64::GPR64RegClass, O))
> -      return false;
> -    break;
> -  case 'H':
> -    // Output higher numbered of a 64-bit general register pair
> -  case 'Q':
> -    // Output least significant register of a 64-bit general register pair
> -  case 'R':
> -    // Output most significant register of a 64-bit general register pair
> -
> -    // FIXME note: these three operand modifiers will require, to some extent,
> -    // adding a paired GPR64 register class. Initial investigation suggests that
> -    // assertions are hit unless it has a type and is made legal for that type
> -    // in ISelLowering. After that step is made, the number of modifications
> -    // needed explodes (operation legality, calling conventions, stores, reg
> -    // copies ...).
> -    llvm_unreachable("FIXME: Unimplemented register pairs");
> -  case 'b':
> -  case 'h':
> -  case 's':
> -  case 'd':
> -  case 'q':
> -    if (!printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
> -                                    ExtraCode[0], O))
> -      return false;
> -    break;
> -  case 'A':
> -    // Output symbolic address with appropriate relocation modifier (also
> -    // suitable for ADRP).
> -    if (!printSymbolicAddress(MI->getOperand(OpNum), false, "", O))
> -      return false;
> -    break;
> -  case 'L':
> -    // Output bits 11:0 of symbolic address with appropriate :lo12: relocation
> -    // modifier.
> -    if (!printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O))
> -      return false;
> -    break;
> -  case 'G':
> -    // Output bits 23:12 of symbolic address with appropriate :hi12: relocation
> -    // modifier (currently only for TLS local exec).
> -    if (!printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O))
> -      return false;
> -    break;
> -  case 'a':
> -    return PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O);
> -  }
> -
> -  // There's actually no operand modifier, which leads to a slightly eclectic
> -  // set of behaviour which we have to handle here.
> -  const MachineOperand &MO = MI->getOperand(OpNum);
> -  switch (MO.getType()) {
> -  default:
> -    llvm_unreachable("Unexpected operand for inline assembly");
> -  case MachineOperand::MO_Register:
> -    // GCC prints the unmodified operand of a 'w' constraint as the vector
> -    // register. Technically, we could allocate the argument as a VPR128, but
> -    // that leads to extremely dodgy copies being generated to get the data
> -    // there.
> -    if (printModifiedFPRAsmOperand(MO, TRI, 'v', O))
> -      O << AArch64InstPrinter::getRegisterName(MO.getReg());
> -    break;
> -  case MachineOperand::MO_Immediate:
> -    O << '#' << MO.getImm();
> -    break;
> -  case MachineOperand::MO_FPImmediate:
> -    assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
> -    O << "#0.0";
> -    break;
> -  case MachineOperand::MO_BlockAddress:
> -  case MachineOperand::MO_ConstantPoolIndex:
> -  case MachineOperand::MO_GlobalAddress:
> -    return printSymbolicAddress(MO, false, "", O);
> -  }
> -
> -  return false;
> -}
> -
> -bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
> -                                              unsigned OpNum,
> -                                              unsigned AsmVariant,
> -                                              const char *ExtraCode,
> -                                              raw_ostream &O) {
> -  // Currently both the memory constraints (m and Q) behave the same and amount
> -  // to the address as a single register. In future, we may allow "m" to provide
> -  // both a base and an offset.
> -  const MachineOperand &MO = MI->getOperand(OpNum);
> -  assert(MO.isReg() && "unexpected inline assembly memory operand");
> -  O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']';
> -  return false;
> -}
> -
> -#include "AArch64GenMCPseudoLowering.inc"
> -
> -void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
> -  // Do any auto-generated pseudo lowerings.
> -  if (emitPseudoExpansionLowering(OutStreamer, MI))
> -    return;
> -
> -  MCInst TmpInst;
> -  LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this);
> -  EmitToStreamer(OutStreamer, TmpInst);
> -}
> -
> -void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
> -  if (Subtarget->isTargetELF()) {
> -    const TargetLoweringObjectFileELF &TLOFELF =
> -      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
> -
> -    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
> -
> -    // Output stubs for external and common global variables.
> -    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
> -    if (!Stubs.empty()) {
> -      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
> -      const DataLayout *TD = TM.getDataLayout();
> -
> -      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
> -        OutStreamer.EmitLabel(Stubs[i].first);
> -        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
> -                                    TD->getPointerSize(0));
> -      }
> -      Stubs.clear();
> -    }
> -  }
> -}
> -
> -bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
> -  return AsmPrinter::runOnMachineFunction(MF);
> -}
> -
> -// Force static initialization.
> -extern "C" void LLVMInitializeAArch64AsmPrinter() {
> -    RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64leTarget);
> -    RegisterAsmPrinter<AArch64AsmPrinter> Y(TheAArch64beTarget);
> -}
> -
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.h?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.h (removed)
> @@ -1,76 +0,0 @@
> -// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file defines the AArch64 assembly printer class.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#ifndef LLVM_AARCH64ASMPRINTER_H
> -#define LLVM_AARCH64ASMPRINTER_H
> -
> -#include "AArch64.h"
> -#include "AArch64TargetMachine.h"
> -#include "llvm/CodeGen/AsmPrinter.h"
> -#include "llvm/MC/MCStreamer.h"
> -#include "llvm/Support/Compiler.h"
> -
> -namespace llvm {
> -
> -class MCOperand;
> -
> -class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter {
> -
> -  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
> -  /// make the right decision when printing asm code for different targets.
> -  const AArch64Subtarget *Subtarget;
> -
> -  // emitPseudoExpansionLowering - tblgen'erated.
> -  bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
> -                                   const MachineInstr *MI);
> -
> -  public:
> -  explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
> -    : AsmPrinter(TM, Streamer) {
> -    Subtarget = &TM.getSubtarget<AArch64Subtarget>();
> -  }
> -
> -  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
> -
> -  MCOperand lowerSymbolOperand(const MachineOperand &MO,
> -                               const MCSymbol *Sym) const;
> -
> -  void EmitInstruction(const MachineInstr *MI) override;
> -  void EmitEndOfAsmFile(Module &M) override;
> -
> -  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
> -                       unsigned AsmVariant, const char *ExtraCode,
> -                       raw_ostream &O) override;
> -  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
> -                             unsigned AsmVariant, const char *ExtraCode,
> -                             raw_ostream &O) override;
> -
> -  /// printSymbolicAddress - Given some kind of reasonably bare symbolic
> -  /// reference, print out the appropriate asm string to represent it. If
> -  /// appropriate, a relocation-specifier will be produced, composed of a
> -  /// general class derived from the MO parameter and an instruction-specific
> -  /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is
> -  /// given.
> -  bool printSymbolicAddress(const MachineOperand &MO,
> -                            bool PrintImmediatePrefix,
> -                            StringRef Suffix, raw_ostream &O);
> -
> -  const char *getPassName() const override {
> -    return "AArch64 Assembly Printer";
> -  }
> -
> -  bool runOnMachineFunction(MachineFunction &MF) override;
> -};
> -} // end namespace llvm
> -
> -#endif
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64BranchFixupPass.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64BranchFixupPass.cpp?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64BranchFixupPass.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64BranchFixupPass.cpp (removed)
> @@ -1,601 +0,0 @@
> -//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file contains a pass that fixes AArch64 branches which have ended up out
> -// of range for their immediate operands.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#include "AArch64.h"
> -#include "AArch64InstrInfo.h"
> -#include "Utils/AArch64BaseInfo.h"
> -#include "llvm/ADT/Statistic.h"
> -#include "llvm/CodeGen/MachineFunctionPass.h"
> -#include "llvm/CodeGen/MachineInstrBuilder.h"
> -#include "llvm/CodeGen/MachineRegisterInfo.h"
> -#include "llvm/Support/Debug.h"
> -#include "llvm/Support/Format.h"
> -#include "llvm/Support/raw_ostream.h"
> -using namespace llvm;
> -
> -#define DEBUG_TYPE "aarch64-branch-fixup"
> -
> -STATISTIC(NumSplit,      "Number of uncond branches inserted");
> -STATISTIC(NumCBrFixed,   "Number of cond branches fixed");
> -
> -/// Return the worst case padding that could result from unknown offset bits.
> -/// This does not include alignment padding caused by known offset bits.
> -///
> -/// @param LogAlign log2(alignment)
> -/// @param KnownBits Number of known low offset bits.
> -static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
> -  if (KnownBits < LogAlign)
> -    return (1u << LogAlign) - (1u << KnownBits);
> -  return 0;
> -}
> -
> -namespace {
> -  /// Due to limited PC-relative displacements, conditional branches to distant
> -  /// blocks may need converting into an unconditional equivalent. For example:
> -  ///     tbz w1, #0, far_away
> -  /// becomes
> -  ///     tbnz w1, #0, skip
> -  ///     b far_away
> -  ///   skip:
> -  class AArch64BranchFixup : public MachineFunctionPass {
> -    /// Information about the offset and size of a single basic block.
> -    struct BasicBlockInfo {
> -      /// Distance from the beginning of the function to the beginning of this
> -      /// basic block.
> -      ///
> -      /// Offsets are computed assuming worst case padding before an aligned
> -      /// block. This means that subtracting basic block offsets always gives a
> -      /// conservative estimate of the real distance which may be smaller.
> -      ///
> -      /// Because worst case padding is used, the computed offset of an aligned
> -      /// block may not actually be aligned.
> -      unsigned Offset;
> -
> -      /// Size of the basic block in bytes.  If the block contains inline
> -      /// assembly, this is a worst case estimate.
> -      ///
> -      /// The size does not include any alignment padding whether from the
> -      /// beginning of the block, or from an aligned jump table at the end.
> -      unsigned Size;
> -
> -      /// The number of low bits in Offset that are known to be exact.  The
> -      /// remaining bits of Offset are an upper bound.
> -      uint8_t KnownBits;
> -
> -      /// When non-zero, the block contains instructions (inline asm) of unknown
> -      /// size.  The real size may be smaller than Size bytes by a multiple of 1
> -      /// << Unalign.
> -      uint8_t Unalign;
> -
> -      BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {}
> -
> -      /// Compute the number of known offset bits internally to this block.
> -      /// This number should be used to predict worst case padding when
> -      /// splitting the block.
> -      unsigned internalKnownBits() const {
> -        unsigned Bits = Unalign ? Unalign : KnownBits;
> -        // If the block size isn't a multiple of the known bits, assume the
> -        // worst case padding.
> -        if (Size & ((1u << Bits) - 1))
> -          Bits = countTrailingZeros(Size);
> -        return Bits;
> -      }
> -
> -      /// Compute the offset immediately following this block.  If LogAlign is
> -      /// specified, return the offset the successor block will get if it has
> -      /// this alignment.
> -      unsigned postOffset(unsigned LogAlign = 0) const {
> -        unsigned PO = Offset + Size;
> -        if (!LogAlign)
> -          return PO;
> -        // Add alignment padding from the terminator.
> -        return PO + UnknownPadding(LogAlign, internalKnownBits());
> -      }
> -
> -      /// Compute the number of known low bits of postOffset.  If this block
> -      /// contains inline asm, the number of known bits drops to the
> -      /// instruction alignment.  An aligned terminator may increase the number
> -      /// of know bits.
> -      /// If LogAlign is given, also consider the alignment of the next block.
> -      unsigned postKnownBits(unsigned LogAlign = 0) const {
> -        return std::max(LogAlign, internalKnownBits());
> -      }
> -    };
> -
> -    std::vector<BasicBlockInfo> BBInfo;
> -
> -    /// One per immediate branch, keeping the machine instruction pointer,
> -    /// conditional or unconditional, the max displacement, and (if IsCond is
> -    /// true) the corresponding inverted branch opcode.
> -    struct ImmBranch {
> -      MachineInstr *MI;
> -      unsigned OffsetBits : 31;
> -      bool IsCond : 1;
> -      ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond)
> -        : MI(mi), OffsetBits(offsetbits), IsCond(cond) {}
> -    };
> -
> -    /// Keep track of all the immediate branch instructions.
> -    ///
> -    std::vector<ImmBranch> ImmBranches;
> -
> -    MachineFunction *MF;
> -    const AArch64InstrInfo *TII;
> -  public:
> -    static char ID;
> -    AArch64BranchFixup() : MachineFunctionPass(ID) {}
> -
> -    bool runOnMachineFunction(MachineFunction &MF) override;
> -
> -    const char *getPassName() const override {
> -      return "AArch64 branch fixup pass";
> -    }
> -
> -  private:
> -    void initializeFunctionInfo();
> -    MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
> -    void adjustBBOffsetsAfter(MachineBasicBlock *BB);
> -    bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB,
> -                     unsigned OffsetBits);
> -    bool fixupImmediateBr(ImmBranch &Br);
> -    bool fixupConditionalBr(ImmBranch &Br);
> -
> -    void computeBlockSize(MachineBasicBlock *MBB);
> -    unsigned getOffsetOf(MachineInstr *MI) const;
> -    void dumpBBs();
> -    void verify();
> -  };
> -  char AArch64BranchFixup::ID = 0;
> -}
> -
> -/// check BBOffsets
> -void AArch64BranchFixup::verify() {
> -#ifndef NDEBUG
> -  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
> -       MBBI != E; ++MBBI) {
> -    MachineBasicBlock *MBB = MBBI;
> -    unsigned MBBId = MBB->getNumber();
> -    assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
> -  }
> -#endif
> -}
> -
> -/// print block size and offset information - debugging
> -void AArch64BranchFixup::dumpBBs() {
> -  DEBUG({
> -    for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
> -      const BasicBlockInfo &BBI = BBInfo[J];
> -      dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
> -             << " kb=" << unsigned(BBI.KnownBits)
> -             << " ua=" << unsigned(BBI.Unalign)
> -             << format(" size=%#x\n", BBInfo[J].Size);
> -    }
> -  });
> -}
> -
> -/// Returns an instance of the branch fixup pass.
> -FunctionPass *llvm::createAArch64BranchFixupPass() {
> -  return new AArch64BranchFixup();
> -}
> -
> -bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) {
> -  MF = &mf;
> -  DEBUG(dbgs() << "***** AArch64BranchFixup ******");
> -  TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo();
> -
> -  // This pass invalidates liveness information when it splits basic blocks.
> -  MF->getRegInfo().invalidateLiveness();
> -
> -  // Renumber all of the machine basic blocks in the function, guaranteeing that
> -  // the numbers agree with the position of the block in the function.
> -  MF->RenumberBlocks();
> -
> -  // Do the initial scan of the function, building up information about the
> -  // sizes of each block and location of each immediate branch.
> -  initializeFunctionInfo();
> -
> -  // Iteratively fix up branches until there is no change.
> -  unsigned NoBRIters = 0;
> -  bool MadeChange = false;
> -  while (true) {
> -    DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n');
> -    bool BRChange = false;
> -    for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
> -      BRChange |= fixupImmediateBr(ImmBranches[i]);
> -    if (BRChange && ++NoBRIters > 30)
> -      report_fatal_error("Branch Fix Up pass failed to converge!");
> -    DEBUG(dumpBBs());
> -
> -    if (!BRChange)
> -      break;
> -    MadeChange = true;
> -  }
> -
> -  // After a while, this might be made debug-only, but it is not expensive.
> -  verify();
> -
> -  DEBUG(dbgs() << '\n'; dumpBBs());
> -
> -  BBInfo.clear();
> -  ImmBranches.clear();
> -
> -  return MadeChange;
> -}
> -
> -/// Return true if the specified basic block can fallthrough into the block
> -/// immediately after it.
> -static bool BBHasFallthrough(MachineBasicBlock *MBB) {
> -  // Get the next machine basic block in the function.
> -  MachineFunction::iterator MBBI = MBB;
> -  // Can't fall off end of function.
> -  if (std::next(MBBI) == MBB->getParent()->end())
> -    return false;
> -
> -  MachineBasicBlock *NextBB = std::next(MBBI);
> -  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
> -       E = MBB->succ_end(); I != E; ++I)
> -    if (*I == NextBB)
> -      return true;
> -
> -  return false;
> -}
> -
> -/// Do the initial scan of the function, building up information about the sizes
> -/// of each block, and each immediate branch.
> -void AArch64BranchFixup::initializeFunctionInfo() {
> -  BBInfo.clear();
> -  BBInfo.resize(MF->getNumBlockIDs());
> -
> -  // First thing, compute the size of all basic blocks, and see if the function
> -  // has any inline assembly in it. If so, we have to be conservative about
> -  // alignment assumptions, as we don't know for sure the size of any
> -  // instructions in the inline assembly.
> -  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
> -    computeBlockSize(I);
> -
> -  // The known bits of the entry block offset are determined by the function
> -  // alignment.
> -  BBInfo.front().KnownBits = MF->getAlignment();
> -
> -  // Compute block offsets and known bits.
> -  adjustBBOffsetsAfter(MF->begin());
> -
> -  // Now go back through the instructions and build up our data structures.
> -  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
> -       MBBI != E; ++MBBI) {
> -    MachineBasicBlock &MBB = *MBBI;
> -
> -    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
> -         I != E; ++I) {
> -      if (I->isDebugValue())
> -        continue;
> -
> -      int Opc = I->getOpcode();
> -      if (I->isBranch()) {
> -        bool IsCond = false;
> -
> -        // The offsets encoded in instructions here scale by the instruction
> -        // size (4 bytes), effectively increasing their range by 2 bits.
> -        unsigned Bits = 0;
> -        switch (Opc) {
> -        default:
> -          continue;  // Ignore other JT branches
> -        case AArch64::TBZxii:
> -        case AArch64::TBZwii:
> -        case AArch64::TBNZxii:
> -        case AArch64::TBNZwii:
> -          IsCond = true;
> -          Bits = 14 + 2;
> -          break;
> -        case AArch64::Bcc:
> -        case AArch64::CBZx:
> -        case AArch64::CBZw:
> -        case AArch64::CBNZx:
> -        case AArch64::CBNZw:
> -          IsCond = true;
> -          Bits = 19 + 2;
> -          break;
> -        case AArch64::Bimm:
> -          Bits = 26 + 2;
> -          break;
> -        }
> -
> -        // Record this immediate branch.
> -        ImmBranches.push_back(ImmBranch(I, Bits, IsCond));
> -      }
> -    }
> -  }
> -}
> -
> -/// Compute the size and some alignment information for MBB.  This function
> -/// updates BBInfo directly.
> -void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) {
> -  BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
> -  BBI.Size = 0;
> -  BBI.Unalign = 0;
> -
> -  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
> -       ++I) {
> -    BBI.Size += TII->getInstSizeInBytes(*I);
> -    // For inline asm, GetInstSizeInBytes returns a conservative estimate.
> -    // The actual size may be smaller, but still a multiple of the instr size.
> -    if (I->isInlineAsm())
> -      BBI.Unalign = 2;
> -  }
> -}
> -
> -/// Return the current offset of the specified machine instruction from the
> -/// start of the function.  This offset changes as stuff is moved around inside
> -/// the function.
> -unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const {
> -  MachineBasicBlock *MBB = MI->getParent();
> -
> -  // The offset is composed of two things: the sum of the sizes of all MBB's
> -  // before this instruction's block, and the offset from the start of the block
> -  // it is in.
> -  unsigned Offset = BBInfo[MBB->getNumber()].Offset;
> -
> -  // Sum instructions before MI in MBB.
> -  for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
> -    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
> -    Offset += TII->getInstSizeInBytes(*I);
> -  }
> -  return Offset;
> -}
> -
> -/// Split the basic block containing MI into two blocks, which are joined by
> -/// an unconditional branch.  Update data structures and renumber blocks to
> -/// account for this change and returns the newly created block.
> -MachineBasicBlock *
> -AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) {
> -  MachineBasicBlock *OrigBB = MI->getParent();
> -
> -  // Create a new MBB for the code after the OrigBB.
> -  MachineBasicBlock *NewBB =
> -    MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
> -  MachineFunction::iterator MBBI = OrigBB; ++MBBI;
> -  MF->insert(MBBI, NewBB);
> -
> -  // Splice the instructions starting with MI over to NewBB.
> -  NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
> -
> -  // Add an unconditional branch from OrigBB to NewBB.
> -  // Note the new unconditional branch is not being recorded.
> -  // There doesn't seem to be meaningful DebugInfo available; this doesn't
> -  // correspond to anything in the source.
> -  BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB);
> -  ++NumSplit;
> -
> -  // Update the CFG.  All succs of OrigBB are now succs of NewBB.
> -  NewBB->transferSuccessors(OrigBB);
> -
> -  // OrigBB branches to NewBB.
> -  OrigBB->addSuccessor(NewBB);
> -
> -  // Update internal data structures to account for the newly inserted MBB.
> -  MF->RenumberBlocks(NewBB);
> -
> -  // Insert an entry into BBInfo to align it properly with the (newly
> -  // renumbered) block numbers.
> -  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
> -
> -  // Figure out how large the OrigBB is.  As the first half of the original
> -  // block, it cannot contain a tablejump.  The size includes
> -  // the new jump we added.  (It should be possible to do this without
> -  // recounting everything, but it's very confusing, and this is rarely
> -  // executed.)
> -  computeBlockSize(OrigBB);
> -
> -  // Figure out how large the NewMBB is.  As the second half of the original
> -  // block, it may contain a tablejump.
> -  computeBlockSize(NewBB);
> -
> -  // All BBOffsets following these blocks must be modified.
> -  adjustBBOffsetsAfter(OrigBB);
> -
> -  return NewBB;
> -}
> -
> -void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
> -  unsigned BBNum = BB->getNumber();
> -  for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
> -    // Get the offset and known bits at the end of the layout predecessor.
> -    // Include the alignment of the current block.
> -    unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
> -    unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
> -    unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
> -
> -    // This is where block i begins.  Stop if the offset is already correct,
> -    // and we have updated 2 blocks.  This is the maximum number of blocks
> -    // changed before calling this function.
> -    if (i > BBNum + 2 &&
> -        BBInfo[i].Offset == Offset &&
> -        BBInfo[i].KnownBits == KnownBits)
> -      break;
> -
> -    BBInfo[i].Offset = Offset;
> -    BBInfo[i].KnownBits = KnownBits;
> -  }
> -}
> -
> -/// Returns true if the distance between specific MI and specific BB can fit in
> -/// MI's displacement field.
> -bool AArch64BranchFixup::isBBInRange(MachineInstr *MI,
> -                                     MachineBasicBlock *DestBB,
> -                                     unsigned OffsetBits) {
> -  int64_t BrOffset   = getOffsetOf(MI);
> -  int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset;
> -
> -  DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
> -               << " from BB#" << MI->getParent()->getNumber()
> -               << " bits available=" << OffsetBits
> -               << " from " << getOffsetOf(MI) << " to " << DestOffset
> -               << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
> -
> -  return isIntN(OffsetBits, DestOffset - BrOffset);
> -}
> -
> -/// Fix up an immediate branch whose destination is too far away to fit in its
> -/// displacement field.
> -bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) {
> -  MachineInstr *MI = Br.MI;
> -  MachineBasicBlock *DestBB = nullptr;
> -  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
> -    if (MI->getOperand(i).isMBB()) {
> -      DestBB = MI->getOperand(i).getMBB();
> -      break;
> -    }
> -  }
> -  assert(DestBB && "Branch with no destination BB?");
> -
> -  // Check to see if the DestBB is already in-range.
> -  if (isBBInRange(MI, DestBB, Br.OffsetBits))
> -    return false;
> -
> -  assert(Br.IsCond && "Only conditional branches should need fixup");
> -  return fixupConditionalBr(Br);
> -}
> -
> -/// Fix up a conditional branch whose destination is too far away to fit in its
> -/// displacement field. It is converted to an inverse conditional branch + an
> -/// unconditional branch to the destination.
> -bool
> -AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) {
> -  MachineInstr *MI = Br.MI;
> -  MachineBasicBlock *MBB = MI->getParent();
> -  unsigned CondBrMBBOperand = 0;
> -
> -  // The general idea is to add an unconditional branch to the destination and
> -  // invert the conditional branch to jump over it. Complications occur around
> -  // fallthrough and unreachable ends to the block.
> -  //   b.lt L1
> -  //   =>
> -  //   b.ge L2
> -  //   b   L1
> -  // L2:
> -
> -  // First we invert the conditional branch, by creating a replacement if
> -  // necessary. This if statement contains all the special handling of different
> -  // branch types.
> -  if (MI->getOpcode() == AArch64::Bcc) {
> -    // The basic block is operand number 1 for Bcc
> -    CondBrMBBOperand = 1;
> -
> -    A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm();
> -    CC = A64InvertCondCode(CC);
> -    MI->getOperand(0).setImm(CC);
> -  } else {
> -    MachineInstrBuilder InvertedMI;
> -    int InvertedOpcode;
> -    switch (MI->getOpcode()) {
> -    default: llvm_unreachable("Unknown branch type");
> -    case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break;
> -    case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break;
> -    case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break;
> -    case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break;
> -    case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break;
> -    case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break;
> -    case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break;
> -    case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break;
> -    }
> -
> -    InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode));
> -    for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) {
> -      InvertedMI.addOperand(MI->getOperand(i));
> -      if (MI->getOperand(i).isMBB())
> -        CondBrMBBOperand = i;
> -    }
> -
> -    MI->eraseFromParent();
> -    MI = Br.MI = InvertedMI;
> -  }
> -
> -  // If the branch is at the end of its MBB and that has a fall-through block,
> -  // direct the updated conditional branch to the fall-through
> -  // block. Otherwise, split the MBB before the next instruction.
> -  MachineInstr *BMI = &MBB->back();
> -  bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
> -
> -  ++NumCBrFixed;
> -  if (BMI != MI) {
> -    if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
> -        BMI->getOpcode() == AArch64::Bimm) {
> -      // Last MI in the BB is an unconditional branch. We can swap destinations:
> -      // b.eq L1 (temporarily b.ne L1 after first change)
> -      // b   L2
> -      // =>
> -      // b.ne L2
> -      // b   L1
> -      MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
> -      if (isBBInRange(MI, NewDest, Br.OffsetBits)) {
> -        DEBUG(dbgs() << "  Invert Bcc condition and swap its destination with "
> -                     << *BMI);
> -        MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB();
> -        BMI->getOperand(0).setMBB(DestBB);
> -        MI->getOperand(CondBrMBBOperand).setMBB(NewDest);
> -        return true;
> -      }
> -    }
> -  }
> -
> -  if (NeedSplit) {
> -    MachineBasicBlock::iterator MBBI = MI; ++MBBI;
> -    splitBlockBeforeInstr(MBBI);
> -    // No need for the branch to the next block. We're adding an unconditional
> -    // branch to the destination.
> -    int delta = TII->getInstSizeInBytes(MBB->back());
> -    BBInfo[MBB->getNumber()].Size -= delta;
> -    MBB->back().eraseFromParent();
> -    // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
> -  }
> -
> -  // After splitting and removing the unconditional branch from the original BB,
> -  // the structure is now:
> -  // oldbb:
> -  //   [things]
> -  //   b.invertedCC L1
> -  // splitbb/fallthroughbb:
> -  //   [old b L2/real continuation]
> -  //
> -  // We now have to change the conditional branch to point to splitbb and add an
> -  // unconditional branch after it to L1, giving the final structure:
> -  // oldbb:
> -  //   [things]
> -  //   b.invertedCC splitbb
> -  //   b L1
> -  // splitbb/fallthroughbb:
> -  //   [old b L2/real continuation]
> -  MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
> -
> -  DEBUG(dbgs() << "  Insert B to BB#"
> -               << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber()
> -               << " also invert condition and change dest. to BB#"
> -               << NextBB->getNumber() << "\n");
> -
> -  // Insert a new unconditional branch and fixup the destination of the
> -  // conditional one.  Also update the ImmBranch as well as adding a new entry
> -  // for the new branch.
> -  BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm))
> -    .addMBB(MI->getOperand(CondBrMBBOperand).getMBB());
> -  MI->getOperand(CondBrMBBOperand).setMBB(NextBB);
> -
> -  BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
> -
> -  // 26 bits written down in Bimm, specifying a multiple of 4.
> -  unsigned OffsetBits = 26 + 2;
> -  ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false));
> -
> -  adjustBBOffsetsAfter(MBB);
> -  return true;
> -}
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64CallingConv.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64CallingConv.td?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64CallingConv.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64CallingConv.td (removed)
> @@ -1,197 +0,0 @@
> -//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -// This describes the calling conventions for AArch64 architecture.
> -//===----------------------------------------------------------------------===//
> -
> -
> -// The AArch64 Procedure Call Standard is unfortunately specified at a slightly
> -// higher level of abstraction than LLVM's target interface presents. In
> -// particular, it refers (like other ABIs, in fact) directly to
> -// structs. However, generic LLVM code takes the liberty of lowering structure
> -// arguments to the component fields before we see them.
> -//
> -// As a result, the obvious direct map from LLVM IR to PCS concepts can't be
> -// implemented, so the goals of this calling convention are, in decreasing
> -// priority order:
> -//     1. Expose *some* way to express the concepts required to implement the
> -//        generic PCS from a front-end.
> -//     2. Provide a sane ABI for pure LLVM.
> -//     3. Follow the generic PCS as closely as is naturally possible.
> -//
> -// The suggested front-end implementation of PCS features is:
> -//     * Integer, float and vector arguments of all sizes which end up in
> -//       registers are passed and returned via the natural LLVM type.
> -//     * Structure arguments with size <= 16 bytes are passed and returned in
> -//       registers as similar integer or composite types. For example:
> -//       [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed).
> -//     * HFAs in registers follow rules similar to small structs: appropriate
> -//       composite types.
> -//     * Structure arguments with size > 16 bytes are passed via a pointer,
> -//       handled completely by the front-end.
> -//     * Structure return values > 16 bytes via an sret pointer argument.
> -//     * Other stack-based arguments (not large structs) are passed using byval
> -//       pointers. Padding arguments are added beforehand to guarantee a large
> -//       struct doesn't later use integer registers.
> -//
> -// N.b. this means that it is the front-end's responsibility (if it cares about
> -// PCS compliance) to check whether enough registers are available for an
> -// argument when deciding how to pass it.
> -
> -class CCIfAlign<int Align, CCAction A>:
> -  CCIf<"ArgFlags.getOrigAlign() == " # Align, A>;
> -
> -def CC_A64_APCS : CallingConv<[
> -  // SRet is an LLVM-specific concept, so it takes precedence over general ABI
> -  // concerns. However, this rule will be used by C/C++ frontends to implement
> -  // structure return.
> -  CCIfSRet<CCAssignToReg<[X8]>>,
> -
> -  // Put ByVal arguments directly on the stack. Minimum size and alignment of a
> -  // slot is 64-bit.
> -  CCIfByVal<CCPassByVal<8, 8>>,
> -
> -  // Canonicalise the various types that live in different floating-point
> -  // registers. This makes sense because the PCS does not distinguish Short
> -  // Vectors and Floating-point types.
> -  CCIfType<[v1i16, v2i8], CCBitConvertToType<f16>>,
> -  CCIfType<[v1i32, v4i8, v2i16], CCBitConvertToType<f32>>,
> -  CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType<f64>>,
> -  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
> -           CCBitConvertToType<f128>>,
> -
> -  // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision
> -  // Floating-point or Short Vector Type and the NSRN is less than 8, then the
> -  // argument is allocated to the least significant bits of register
> -  // v[NSRN]. The NSRN is incremented by one. The argument has now been
> -  // allocated."
> -  CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
> -  CCIfType<[f16],  CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
> -  CCIfType<[f32],  CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
> -  CCIfType<[f64],  CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
> -  CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
> -
> -  // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated
> -  // SIMD and Floating-point registers (NSRN - number of elements < 8), then the
> -  // argument is allocated to SIMD and Floating-point registers (with one
> -  // register per element of the HFA). The NSRN is incremented by the number of
> -  // registers used. The argument has now been allocated."
> -  //
> -  // N.b. As above, this rule is the responsibility of the front-end.
> -
> -  // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of
> -  // the argument is rounded up to the nearest multiple of 8 bytes."
> -  //
> -  // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short
> -  // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural
> -  // Alignment of the Argument's type."
> -  //
> -  // It is expected that these will be satisfied by adding dummy arguments to
> -  // the prototype.
> -
> -  // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point
> -  // type then the size of the argument is set to 8 bytes. The effect is as if
> -  // the argument had been copied to the least significant bits of a 64-bit
> -  // register and the remaining bits filled with unspecified values."
> -  CCIfType<[f16, f32], CCPromoteToType<f64>>,
> -
> -  // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad-
> -  // precision Floating-point or Short Vector Type, then the argument is copied
> -  // to memory at the adjusted NSAA. The NSAA is incremented by the size of the
> -  // argument. The argument has now been allocated."
> -  CCIfType<[f64], CCAssignToStack<8, 8>>,
> -  CCIfType<[f128], CCAssignToStack<16, 16>>,
> -
> -  // PCS: "C.7: If the argument is an Integral Type, the size of the argument is
> -  // less than or equal to 8 bytes and the NGRN is less than 8, the argument is
> -  // copied to the least significant bits of x[NGRN]. The NGRN is incremented by
> -  // one. The argument has now been allocated."
> -
> -  // First we implement C.8 and C.9 (128-bit types get even registers). i128 is
> -  // represented as two i64s, the first one being split. If we delayed this
> -  // operation C.8 would never be reached.
> -  CCIfType<[i64],
> -        CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], [X0, X1, X3, X5]>>>,
> -
> -  // Note: the promotion also implements C.14.
> -  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
> -
> -  // And now the real implementation of C.7
> -  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
> -
> -  // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded
> -  // up to the next even number."
> -  //
> -  // "C.9: If the argument is an Integral Type, the size of the argument is
> -  // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
> -  // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the
> -  // memory representation of the argument. The NGRN is incremented by two. The
> -  // argument has now been allocated."
> -  //
> -  // Subtlety here: what if alignment is 16 but it is not an integral type? All
> -  // floating-point types have been allocated already, which leaves composite
> -  // types: this is why a front-end may need to produce i128 for a struct <= 16
> -  // bytes.
> -
> -  // PCS: "C.10 If the argument is a Composite Type and the size in double-words
> -  // of the argument is not more than 8 minus NGRN, then the argument is copied
> -  // into consecutive general-purpose registers, starting at x[NGRN]. The
> -  // argument is passed as though it had been loaded into the registers from a
> -  // double-word aligned address with an appropriate sequence of LDR
> -  // instructions loading consecutive registers from memory (the contents of any
> -  // unused parts of the registers are unspecified by this standard). The NGRN
> -  // is incremented by the number of registers used. The argument has now been
> -  // allocated."
> -  //
> -  // Another one that's the responsibility of the front-end (sigh).
> -
> -  // PCS: "C.11: The NGRN is set to 8."
> -  CCCustom<"CC_AArch64NoMoreRegs">,
> -
> -  // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural
> -  // Alignment of the argument's type."
> -  //
> -  // PCS: "C.13: If the argument is a composite type then the argument is copied
> -  // to memory at the adjusted NSAA. The NSAA is by the size of the
> -  // argument. The argument has now been allocated."
> -  //
> -  // Note that the effect of this corresponds to a memcpy rather than register
> -  // stores so that the struct ends up correctly addressable at the adjusted
> -  // NSAA.
> -
> -  // PCS: "C.14: If the size of the argument is less than 8 bytes then the size
> -  // of the argument is set to 8 bytes. The effect is as if the argument was
> -  // copied to the least significant bits of a 64-bit register and the remaining
> -  // bits filled with unspecified values."
> -  //
> -  // Integer types were widened above. Floating-point and composite types have
> -  // already been allocated completely. Nothing to do.
> -
> -  // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA
> -  // is incremented by the size of the argument. The argument has now been
> -  // allocated."
> -  CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
> -  CCIfType<[i64], CCAssignToStack<8, 8>>
> -
> -]>;
> -
> -// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits
> -// of vector registers (8-15) are callee-saved. The order here is is picked up
> -// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of
> -// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at
> -// [sp-16], ...
> -def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19),
> -                                   (sequence "D%u", 15, 8))>;
> -
> -
> -// TLS descriptor calls are extremely restricted in their changes, to allow
> -// optimisations in the (hopefully) more common fast path where no real action
> -// is needed. They actually have to preserve all registers, except for the
> -// unavoidable X30 and the return register X0.
> -def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1),
> -                                   (sequence "Q%u", 31, 0))>;
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (removed)
> @@ -1,626 +0,0 @@
> -//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file contains the AArch64 implementation of TargetFrameLowering class.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#include "AArch64.h"
> -#include "AArch64FrameLowering.h"
> -#include "AArch64InstrInfo.h"
> -#include "AArch64MachineFunctionInfo.h"
> -#include "llvm/CodeGen/MachineFrameInfo.h"
> -#include "llvm/CodeGen/MachineFunction.h"
> -#include "llvm/CodeGen/MachineInstrBuilder.h"
> -#include "llvm/CodeGen/MachineMemOperand.h"
> -#include "llvm/CodeGen/MachineModuleInfo.h"
> -#include "llvm/CodeGen/MachineRegisterInfo.h"
> -#include "llvm/CodeGen/RegisterScavenging.h"
> -#include "llvm/IR/Function.h"
> -#include "llvm/MC/MachineLocation.h"
> -#include "llvm/Support/Debug.h"
> -#include "llvm/Support/ErrorHandling.h"
> -
> -using namespace llvm;
> -
> -void AArch64FrameLowering::splitSPAdjustments(uint64_t Total,
> -                                              uint64_t &Initial,
> -                                              uint64_t &Residual) const {
> -  // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP
> -  // instructions have a 7-bit signed immediate scaled by 8, giving a reach of
> -  // 0x1f8, but stack adjustment should always be a multiple of 16.
> -  if (Total <= 0x1f0) {
> -    Initial = Total;
> -    Residual = 0;
> -  } else {
> -    Initial = 0x1f0;
> -    Residual = Total - Initial;
> -  }
> -}
> -
> -void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
> -  AArch64MachineFunctionInfo *FuncInfo =
> -    MF.getInfo<AArch64MachineFunctionInfo>();
> -  MachineBasicBlock &MBB = MF.front();
> -  MachineBasicBlock::iterator MBBI = MBB.begin();
> -  MachineFrameInfo *MFI = MF.getFrameInfo();
> -  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
> -  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
> -
> -  MachineModuleInfo &MMI = MF.getMMI();
> -  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
> -  bool NeedsFrameMoves = MMI.hasDebugInfo()
> -    || MF.getFunction()->needsUnwindTableEntry();
> -
> -  uint64_t NumInitialBytes, NumResidualBytes;
> -
> -  // Currently we expect the stack to be laid out by
> -  //     sub sp, sp, #initial
> -  //     stp x29, x30, [sp, #offset]
> -  //     ...
> -  //     str xxx, [sp, #offset]
> -  //     sub sp, sp, #rest (possibly via extra instructions).
> -  if (MFI->getCalleeSavedInfo().size()) {
> -    // If there are callee-saved registers, we want to store them efficiently as
> -    // a block, and virtual base assignment happens too early to do it for us so
> -    // we adjust the stack in two phases: first just for callee-saved fiddling,
> -    // then to allocate the rest of the frame.
> -    splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes);
> -  } else {
> -    // If there aren't any callee-saved registers, two-phase adjustment is
> -    // inefficient. It's more efficient to adjust with NumInitialBytes too
> -    // because when we're in a "callee pops argument space" situation, that pop
> -    // must be tacked onto Initial for correctness.
> -    NumInitialBytes = MFI->getStackSize();
> -    NumResidualBytes = 0;
> -  }
> -
> -  // Tell everyone else how much adjustment we're expecting them to use. In
> -  // particular if an adjustment is required for a tail call the epilogue could
> -  // have a different view of things.
> -  FuncInfo->setInitialStackAdjust(NumInitialBytes);
> -
> -  emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes,
> -               MachineInstr::FrameSetup);
> -
> -  if (NeedsFrameMoves && NumInitialBytes) {
> -    // We emit this update even if the CFA is set from a frame pointer later so
> -    // that the CFA is valid in the interim.
> -    MachineLocation Dst(MachineLocation::VirtualFP);
> -    unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true);
> -    unsigned CFIIndex = MMI.addFrameInst(
> -        MCCFIInstruction::createDefCfa(nullptr, Reg, -NumInitialBytes));
> -    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
> -        .addCFIIndex(CFIIndex);
> -  }
> -
> -  // Otherwise we need to set the frame pointer and/or add a second stack
> -  // adjustment.
> -
> -  bool FPNeedsSetting = hasFP(MF);
> -  for (; MBBI != MBB.end(); ++MBBI) {
> -    // Note that this search makes strong assumptions about the operation used
> -    // to store the frame-pointer: it must be "STP x29, x30, ...". This could
> -    // change in future, but until then there's no point in implementing
> -    // untestable more generic cases.
> -    if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR
> -                       && MBBI->getOperand(0).getReg() == AArch64::X29) {
> -      int64_t X29FrameIdx = MBBI->getOperand(2).getIndex();
> -      FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx));
> -
> -      ++MBBI;
> -      emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP,
> -                    AArch64::X29,
> -                    NumInitialBytes + MFI->getObjectOffset(X29FrameIdx),
> -                    MachineInstr::FrameSetup);
> -
> -      // The offset adjustment used when emitting debugging locations relative
> -      // to whatever frame base is set. AArch64 uses the default frame base (FP
> -      // or SP) and this adjusts the calculations to be correct.
> -      MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx)
> -                               - MFI->getStackSize());
> -
> -      if (NeedsFrameMoves) {
> -        unsigned Reg = MRI->getDwarfRegNum(AArch64::X29, true);
> -        unsigned Offset = MFI->getObjectOffset(X29FrameIdx);
> -        unsigned CFIIndex = MMI.addFrameInst(
> -            MCCFIInstruction::createDefCfa(nullptr, Reg, Offset));
> -        BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
> -            .addCFIIndex(CFIIndex);
> -      }
> -
> -      FPNeedsSetting = false;
> -    }
> -
> -    if (!MBBI->getFlag(MachineInstr::FrameSetup))
> -      break;
> -  }
> -
> -  assert(!FPNeedsSetting && "Frame pointer couldn't be set");
> -
> -  emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes,
> -               MachineInstr::FrameSetup);
> -
> -  // Now we emit the rest of the frame setup information, if necessary: we've
> -  // already noted the FP and initial SP moves so we're left with the prologue's
> -  // final SP update and callee-saved register locations.
> -  if (!NeedsFrameMoves)
> -    return;
> -
> -  // The rest of the stack adjustment
> -  if (!hasFP(MF) && NumResidualBytes) {
> -    MachineLocation Dst(MachineLocation::VirtualFP);
> -    unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true);
> -    unsigned Offset = NumResidualBytes + NumInitialBytes;
> -    unsigned CFIIndex =
> -        MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
> -    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
> -        .addCFIIndex(CFIIndex);
> -  }
> -
> -  // And any callee-saved registers (it's fine to leave them to the end here,
> -  // because the old values are still valid at this point.
> -  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
> -  if (CSI.size()) {
> -    for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
> -           E = CSI.end(); I != E; ++I) {
> -      unsigned Offset = MFI->getObjectOffset(I->getFrameIdx());
> -      unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true);
> -      unsigned CFIIndex = MMI.addFrameInst(
> -          MCCFIInstruction::createOffset(nullptr, Reg, Offset));
> -      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
> -          .addCFIIndex(CFIIndex);
> -    }
> -  }
> -}
> -
> -void
> -AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
> -                                   MachineBasicBlock &MBB) const {
> -  AArch64MachineFunctionInfo *FuncInfo =
> -    MF.getInfo<AArch64MachineFunctionInfo>();
> -
> -  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
> -  DebugLoc DL = MBBI->getDebugLoc();
> -  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
> -  MachineFrameInfo &MFI = *MF.getFrameInfo();
> -  unsigned RetOpcode = MBBI->getOpcode();
> -
> -  // Initial and residual are named for consitency with the prologue. Note that
> -  // in the epilogue, the residual adjustment is executed first.
> -  uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust();
> -  uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes;
> -  uint64_t ArgumentPopSize = 0;
> -  if (RetOpcode == AArch64::TC_RETURNdi ||
> -      RetOpcode == AArch64::TC_RETURNxi) {
> -    MachineOperand &JumpTarget = MBBI->getOperand(0);
> -    MachineOperand &StackAdjust = MBBI->getOperand(1);
> -
> -    MachineInstrBuilder MIB;
> -    if (RetOpcode == AArch64::TC_RETURNdi) {
> -      MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm));
> -      if (JumpTarget.isGlobal()) {
> -        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
> -                             JumpTarget.getTargetFlags());
> -      } else {
> -        assert(JumpTarget.isSymbol() && "unexpected tail call destination");
> -        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
> -                              JumpTarget.getTargetFlags());
> -      }
> -    } else {
> -      assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg()
> -             && "Unexpected tail call");
> -
> -      MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx));
> -      MIB.addReg(JumpTarget.getReg(), RegState::Kill);
> -    }
> -
> -    // Add the extra operands onto the new tail call instruction even though
> -    // they're not used directly (so that liveness is tracked properly etc).
> -    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
> -        MIB->addOperand(MBBI->getOperand(i));
> -
> -
> -    // Delete the pseudo instruction TC_RETURN.
> -    MachineInstr *NewMI = std::prev(MBBI);
> -    MBB.erase(MBBI);
> -    MBBI = NewMI;
> -
> -    // For a tail-call in a callee-pops-arguments environment, some or all of
> -    // the stack may actually be in use for the call's arguments, this is
> -    // calculated during LowerCall and consumed here...
> -    ArgumentPopSize = StackAdjust.getImm();
> -  } else {
> -    // ... otherwise the amount to pop is *all* of the argument space,
> -    // conveniently stored in the MachineFunctionInfo by
> -    // LowerFormalArguments. This will, of course, be zero for the C calling
> -    // convention.
> -    ArgumentPopSize = FuncInfo->getArgumentStackToRestore();
> -  }
> -
> -  assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0
> -         && "refusing to adjust stack by misaligned amt");
> -
> -  // We may need to address callee-saved registers differently, so find out the
> -  // bound on the frame indices.
> -  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
> -  int MinCSFI = 0;
> -  int MaxCSFI = -1;
> -
> -  if (CSI.size()) {
> -    MinCSFI = CSI[0].getFrameIdx();
> -    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
> -  }
> -
> -  // The "residual" stack update comes first from this direction and guarantees
> -  // that SP is NumInitialBytes below its value on function entry, either by a
> -  // direct update or restoring it from the frame pointer.
> -  if (NumInitialBytes + ArgumentPopSize != 0) {
> -    emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16,
> -                 NumInitialBytes + ArgumentPopSize);
> -    --MBBI;
> -  }
> -
> -
> -  // MBBI now points to the instruction just past the last callee-saved
> -  // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp"
> -  // otherwise).
> -
> -  // Now we need to find out where to put the bulk of the stack adjustment
> -  MachineBasicBlock::iterator FirstEpilogue = MBBI;
> -  while (MBBI != MBB.begin()) {
> -    --MBBI;
> -
> -    unsigned FrameOp;
> -    for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) {
> -      if (MBBI->getOperand(FrameOp).isFI())
> -        break;
> -    }
> -
> -    // If this instruction doesn't have a frame index we've reached the end of
> -    // the callee-save restoration.
> -    if (FrameOp == MBBI->getNumOperands())
> -      break;
> -
> -    // Likewise if it *is* a local reference, but not to a callee-saved object.
> -    int FrameIdx = MBBI->getOperand(FrameOp).getIndex();
> -    if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI)
> -      break;
> -
> -    FirstEpilogue = MBBI;
> -  }
> -
> -  if (MF.getFrameInfo()->hasVarSizedObjects()) {
> -    int64_t StaticFrameBase;
> -    StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset());
> -    emitRegUpdate(MBB, FirstEpilogue, DL, TII,
> -                  AArch64::XSP, AArch64::X29, AArch64::NoRegister,
> -                  StaticFrameBase);
> -  } else {
> -    emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes);
> -  }
> -}
> -
> -int64_t
> -AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
> -                                                 int FrameIndex,
> -                                                 unsigned &FrameReg,
> -                                                 int SPAdj,
> -                                                 bool IsCalleeSaveOp) const {
> -  AArch64MachineFunctionInfo *FuncInfo =
> -    MF.getInfo<AArch64MachineFunctionInfo>();
> -  MachineFrameInfo *MFI = MF.getFrameInfo();
> -
> -  int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex);
> -
> -  assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0)
> -         && "callee-saved register in unexpected place");
> -
> -  // If the frame for this function is particularly large, we adjust the stack
> -  // in two phases which means the callee-save related operations see a
> -  // different (intermediate) stack size.
> -  int64_t FrameRegPos;
> -  if (IsCalleeSaveOp) {
> -    FrameReg = AArch64::XSP;
> -    FrameRegPos = -static_cast<int64_t>(FuncInfo->getInitialStackAdjust());
> -  } else if (useFPForAddressing(MF)) {
> -    // Have to use the frame pointer since we have no idea where SP is.
> -    FrameReg = AArch64::X29;
> -    FrameRegPos = FuncInfo->getFramePointerOffset();
> -  } else {
> -    FrameReg = AArch64::XSP;
> -    FrameRegPos = -static_cast<int64_t>(MFI->getStackSize()) + SPAdj;
> -  }
> -
> -  return TopOfFrameOffset - FrameRegPos;
> -}
> -
> -void
> -AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
> -                                                       RegScavenger *RS) const {
> -  const AArch64RegisterInfo *RegInfo =
> -    static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo());
> -  MachineFrameInfo *MFI = MF.getFrameInfo();
> -  const AArch64InstrInfo &TII =
> -    *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
> -
> -  if (hasFP(MF)) {
> -    MF.getRegInfo().setPhysRegUsed(AArch64::X29);
> -    MF.getRegInfo().setPhysRegUsed(AArch64::X30);
> -  }
> -
> -  // If addressing of local variables is going to be more complicated than
> -  // shoving a base register and an offset into the instruction then we may well
> -  // need to scavenge registers. We should either specifically add an
> -  // callee-save register for this purpose or allocate an extra spill slot.
> -  bool BigStack =
> -    MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)
> -    || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
> -    || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
> -
> -  if (!BigStack)
> -    return;
> -
> -  // We certainly need some slack space for the scavenger, preferably an extra
> -  // register.
> -  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs();
> -  MCPhysReg ExtraReg = AArch64::NoRegister;
> -
> -  for (unsigned i = 0; CSRegs[i]; ++i) {
> -    if (AArch64::GPR64RegClass.contains(CSRegs[i]) &&
> -        !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) {
> -      ExtraReg = CSRegs[i];
> -      break;
> -    }
> -  }
> -
> -  if (ExtraReg != 0) {
> -    MF.getRegInfo().setPhysRegUsed(ExtraReg);
> -  } else {
> -    assert(RS && "Expect register scavenger to be available");
> -
> -    // Create a stack slot for scavenging purposes. PrologEpilogInserter
> -    // helpfully places it near either SP or FP for us to avoid
> -    // infinitely-regression during scavenging.
> -    const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
> -    RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
> -                                                       RC->getAlignment(),
> -                                                       false));
> -  }
> -}
> -
> -bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB,
> -                                                  unsigned Reg) const {
> -  // If @llvm.returnaddress is called then it will refer to X30 by some means;
> -  // the prologue store does not kill the register.
> -  if (Reg == AArch64::X30) {
> -    if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken()
> -        && MBB.getParent()->getRegInfo().isLiveIn(Reg))
> -    return false;
> -  }
> -
> -  // In all other cases, physical registers are dead after they've been saved
> -  // but live at the beginning of the prologue block.
> -  MBB.addLiveIn(Reg);
> -  return true;
> -}
> -
> -void
> -AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB,
> -                                      MachineBasicBlock::iterator MBBI,
> -                                      const std::vector<CalleeSavedInfo> &CSI,
> -                                      const TargetRegisterInfo *TRI,
> -                                      const LoadStoreMethod PossClasses[],
> -                                      unsigned NumClasses) const {
> -  DebugLoc DL = MBB.findDebugLoc(MBBI);
> -  MachineFunction &MF = *MBB.getParent();
> -  MachineFrameInfo &MFI = *MF.getFrameInfo();
> -  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
> -
> -  // A certain amount of implicit contract is present here. The actual stack
> -  // offsets haven't been allocated officially yet, so for strictly correct code
> -  // we rely on the fact that the elements of CSI are allocated in order
> -  // starting at SP, purely as dictated by size and alignment. In practice since
> -  // this function handles the only accesses to those slots it's not quite so
> -  // important.
> -  //
> -  // We have also ordered the Callee-saved register list in AArch64CallingConv
> -  // so that the above scheme puts registers in order: in particular we want
> -  // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2)
> -  for (unsigned i = 0, e = CSI.size(); i < e; ++i) {
> -    unsigned Reg = CSI[i].getReg();
> -
> -    // First we need to find out which register class the register belongs to so
> -    // that we can use the correct load/store instrucitons.
> -    unsigned ClassIdx;
> -    for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) {
> -      if (PossClasses[ClassIdx].RegClass->contains(Reg))
> -        break;
> -    }
> -    assert(ClassIdx != NumClasses
> -           && "Asked to store register in unexpected class");
> -    const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass;
> -
> -    // Now we need to decide whether it's possible to emit a paired instruction:
> -    // for this we want the next register to be in the same class.
> -    MachineInstrBuilder NewMI;
> -    bool Pair = false;
> -    if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) {
> -      Pair = true;
> -      unsigned StLow = 0, StHigh = 0;
> -      if (isPrologue) {
> -        // Most of these registers will be live-in to the MBB and killed by our
> -        // store, though there are exceptions (see determinePrologueDeath).
> -        StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg()));
> -        StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
> -      } else {
> -        StLow = RegState::Define;
> -        StHigh = RegState::Define;
> -      }
> -
> -      NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode))
> -                .addReg(CSI[i+1].getReg(), StLow)
> -                .addReg(CSI[i].getReg(), StHigh);
> -
> -      // If it's a paired op, we've consumed two registers
> -      ++i;
> -    } else {
> -      unsigned State;
> -      if (isPrologue) {
> -        State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
> -      } else {
> -        State = RegState::Define;
> -      }
> -
> -      NewMI = BuildMI(MBB, MBBI, DL,
> -                      TII.get(PossClasses[ClassIdx].SingleOpcode))
> -                .addReg(CSI[i].getReg(), State);
> -    }
> -
> -    // Note that the FrameIdx refers to the second register in a pair: it will
> -    // be allocated the smaller numeric address and so is the one an LDP/STP
> -    // address must use.
> -    int FrameIdx = CSI[i].getFrameIdx();
> -    MachineMemOperand::MemOperandFlags Flags;
> -    Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
> -    MachineMemOperand *MMO =
> -      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
> -                             Flags,
> -                             Pair ? TheClass.getSize() * 2 : TheClass.getSize(),
> -                             MFI.getObjectAlignment(FrameIdx));
> -
> -    NewMI.addFrameIndex(FrameIdx)
> -      .addImm(0)                  // address-register offset
> -      .addMemOperand(MMO);
> -
> -    if (isPrologue)
> -      NewMI.setMIFlags(MachineInstr::FrameSetup);
> -
> -    // For aesthetic reasons, during an epilogue we want to emit complementary
> -    // operations to the prologue, but in the opposite order. So we still
> -    // iterate through the CalleeSavedInfo list in order, but we put the
> -    // instructions successively earlier in the MBB.
> -    if (!isPrologue)
> -      --MBBI;
> -  }
> -}
> -
> -bool
> -AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
> -                                        MachineBasicBlock::iterator MBBI,
> -                                        const std::vector<CalleeSavedInfo> &CSI,
> -                                        const TargetRegisterInfo *TRI) const {
> -  if (CSI.empty())
> -    return false;
> -
> -  static const LoadStoreMethod PossibleClasses[] = {
> -    {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR},
> -    {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR},
> -  };
> -  const unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
> -
> -  emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI,
> -                  PossibleClasses, NumClasses);
> -
> -  return true;
> -}
> -
> -bool
> -AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
> -                                        MachineBasicBlock::iterator MBBI,
> -                                        const std::vector<CalleeSavedInfo> &CSI,
> -                                        const TargetRegisterInfo *TRI) const {
> -
> -  if (CSI.empty())
> -    return false;
> -
> -  static const LoadStoreMethod PossibleClasses[] = {
> -    {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR},
> -    {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR},
> -  };
> -  const unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
> -
> -  emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI,
> -                  PossibleClasses, NumClasses);
> -
> -  return true;
> -}
> -
> -bool
> -AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
> -  const MachineFrameInfo *MFI = MF.getFrameInfo();
> -  const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
> -
> -  // This is a decision of ABI compliance. The AArch64 PCS gives various options
> -  // for conformance, and even at the most stringent level more or less permits
> -  // elimination for leaf functions because there's no loss of functionality
> -  // (for debugging etc)..
> -  if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls())
> -    return true;
> -
> -  // The following are hard-limits: incorrect code will be generated if we try
> -  // to omit the frame.
> -  return (RI->needsStackRealignment(MF) ||
> -          MFI->hasVarSizedObjects() ||
> -          MFI->isFrameAddressTaken());
> -}
> -
> -bool
> -AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const {
> -  return MF.getFrameInfo()->hasVarSizedObjects();
> -}
> -
> -bool
> -AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
> -  const MachineFrameInfo *MFI = MF.getFrameInfo();
> -
> -  // Of the various reasons for having a frame pointer, it's actually only
> -  // variable-sized objects that prevent reservation of a call frame.
> -  return !(hasFP(MF) && MFI->hasVarSizedObjects());
> -}
> -
> -void
> -AArch64FrameLowering::eliminateCallFramePseudoInstr(
> -                                MachineFunction &MF,
> -                                MachineBasicBlock &MBB,
> -                                MachineBasicBlock::iterator MI) const {
> -  const AArch64InstrInfo &TII =
> -    *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
> -  DebugLoc dl = MI->getDebugLoc();
> -  int Opcode = MI->getOpcode();
> -  bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode();
> -  uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0;
> -
> -  if (!hasReservedCallFrame(MF)) {
> -    unsigned Align = getStackAlignment();
> -
> -    int64_t Amount = MI->getOperand(0).getImm();
> -    Amount = RoundUpToAlignment(Amount, Align);
> -    if (!IsDestroy) Amount = -Amount;
> -
> -    // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
> -    // doesn't have to pop anything), then the first operand will be zero too so
> -    // this adjustment is a no-op.
> -    if (CalleePopAmount == 0) {
> -      // FIXME: in-function stack adjustment for calls is limited to 12-bits
> -      // because there's no guaranteed temporary register available. Mostly call
> -      // frames will be allocated at the start of a function so this is OK, but
> -      // it is a limitation that needs dealing with.
> -      assert(Amount > -0xfff && Amount < 0xfff && "call frame too large");
> -      emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount);
> -    }
> -  } else if (CalleePopAmount != 0) {
> -    // If the calling convention demands that the callee pops arguments from the
> -    // stack, we want to add it back if we have a reserved call frame.
> -    assert(CalleePopAmount < 0xfff && "call frame too large");
> -    emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount);
> -  }
> -
> -  MBB.erase(MI);
> -}
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h (removed)
> @@ -1,108 +0,0 @@
> -//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This class implements the AArch64-specific parts of the TargetFrameLowering
> -// class.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#ifndef LLVM_AARCH64_FRAMEINFO_H
> -#define LLVM_AARCH64_FRAMEINFO_H
> -
> -#include "AArch64Subtarget.h"
> -#include "llvm/Target/TargetFrameLowering.h"
> -
> -namespace llvm {
> -class AArch64Subtarget;
> -
> -class AArch64FrameLowering : public TargetFrameLowering {
> -private:
> -  // In order to unify the spilling and restoring of callee-saved registers into
> -  // emitFrameMemOps, we need to be able to specify which instructions to use
> -  // for the relevant memory operations on each register class. An array of the
> -  // following struct is populated and passed in to achieve this.
> -  struct LoadStoreMethod {
> -    const TargetRegisterClass *RegClass; // E.g. GPR64RegClass
> -
> -    // The preferred instruction.
> -    unsigned PairOpcode; // E.g. LSPair64_STR
> -
> -    // Sometimes only a single register can be handled at once.
> -    unsigned SingleOpcode; // E.g. LS64_STR
> -  };
> -protected:
> -  const AArch64Subtarget &STI;
> -
> -public:
> -  explicit AArch64FrameLowering(const AArch64Subtarget &sti)
> -    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16),
> -      STI(sti) {
> -  }
> -
> -  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
> -  /// the function.
> -  void emitPrologue(MachineFunction &MF) const override;
> -  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
> -
> -  /// Decides how much stack adjustment to perform in each phase of the prologue
> -  /// and epilogue.
> -  void splitSPAdjustments(uint64_t Total, uint64_t &Initial,
> -                          uint64_t &Residual) const;
> -
> -  int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex,
> -                                     unsigned &FrameReg, int SPAdj,
> -                                     bool IsCalleeSaveOp) const;
> -
> -  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
> -                                            RegScavenger *RS) const override;
> -
> -  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
> -                                MachineBasicBlock::iterator MI,
> -                                const std::vector<CalleeSavedInfo> &CSI,
> -                                const TargetRegisterInfo *TRI) const override;
> -  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
> -                                MachineBasicBlock::iterator MI,
> -                                const std::vector<CalleeSavedInfo> &CSI,
> -                                const TargetRegisterInfo *TRI) const override;
> -
> -  void
> -  eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
> -                                MachineBasicBlock::iterator MI) const override;
> -
> -  /// If the register is X30 (i.e. LR) and the return address is used in the
> -  /// function then the callee-save store doesn't actually kill the register,
> -  /// otherwise it does.
> -  bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const;
> -
> -  /// This function emits the loads or stores required during prologue and
> -  /// epilogue as efficiently as possible.
> -  ///
> -  /// The operations involved in setting up and tearing down the frame are
> -  /// similar enough to warrant a shared function, particularly as discrepancies
> -  /// between the two would be disastrous.
> -  void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB,
> -                       MachineBasicBlock::iterator MI,
> -                       const std::vector<CalleeSavedInfo> &CSI,
> -                       const TargetRegisterInfo *TRI,
> -                       const LoadStoreMethod PossibleClasses[],
> -                       unsigned NumClasses) const;
> -
> -
> -  bool hasFP(const MachineFunction &MF) const override;
> -
> -  bool useFPForAddressing(const MachineFunction &MF) const;
> -
> -  /// On AA
> -  bool hasReservedCallFrame(const MachineFunction &MF) const override;
> -
> -};
> -
> -} // End llvm namespace
> -
> -#endif
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (removed)
> @@ -1,1576 +0,0 @@
> -//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file defines an instruction selector for the AArch64 target.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#include "AArch64.h"
> -#include "AArch64InstrInfo.h"
> -#include "AArch64Subtarget.h"
> -#include "AArch64TargetMachine.h"
> -#include "Utils/AArch64BaseInfo.h"
> -#include "llvm/ADT/APSInt.h"
> -#include "llvm/CodeGen/SelectionDAGISel.h"
> -#include "llvm/IR/GlobalValue.h"
> -#include "llvm/Support/Debug.h"
> -#include "llvm/Support/raw_ostream.h"
> -
> -using namespace llvm;
> -
> -#define DEBUG_TYPE "aarch64-isel"
> -
> -//===--------------------------------------------------------------------===//
> -/// AArch64 specific code to select AArch64 machine instructions for
> -/// SelectionDAG operations.
> -///
> -namespace {
> -
> -class AArch64DAGToDAGISel : public SelectionDAGISel {
> -  AArch64TargetMachine &TM;
> -
> -  /// Keep a pointer to the AArch64Subtarget around so that we can
> -  /// make the right decision when generating code for different targets.
> -  const AArch64Subtarget *Subtarget;
> -
> -public:
> -  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
> -                               CodeGenOpt::Level OptLevel)
> -    : SelectionDAGISel(tm, OptLevel), TM(tm),
> -      Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
> -  }
> -
> -  const char *getPassName() const override {
> -    return "AArch64 Instruction Selection";
> -  }
> -
> -  // Include the pieces autogenerated from the target description.
> -#include "AArch64GenDAGISel.inc"
> -
> -  template<unsigned MemSize>
> -  bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
> -    const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
> -    if (!CN || CN->getZExtValue() % MemSize != 0
> -        || CN->getZExtValue() / MemSize > 0xfff)
> -      return false;
> -
> -    UImm12 =  CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
> -    return true;
> -  }
> -
> -  template<unsigned RegWidth>
> -  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
> -    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
> -  }
> -
> -  /// Used for pre-lowered address-reference nodes, so we already know
> -  /// the fields match. This operand's job is simply to add an
> -  /// appropriate shift operand to the MOVZ/MOVK instruction.
> -  template<unsigned LogShift>
> -  bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) {
> -    Imm = N;
> -    Shift = CurDAG->getTargetConstant(LogShift, MVT::i32);
> -    return true;
> -  }
> -
> -  bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
> -
> -  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
> -                                unsigned RegWidth);
> -
> -  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
> -                                    char ConstraintCode,
> -                                    std::vector<SDValue> &OutOps) override;
> -
> -  bool SelectLogicalImm(SDValue N, SDValue &Imm);
> -
> -  template<unsigned RegWidth>
> -  bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
> -    return SelectTSTBOperand(N, FixedPos, RegWidth);
> -  }
> -
> -  bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
> -
> -  SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32,
> -                       unsigned Op64);
> -
> -  /// Put the given constant into a pool and return a DAG which will give its
> -  /// address.
> -  SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV);
> -
> -  SDNode *TrySelectToMoveImm(SDNode *N);
> -  SDNode *LowerToFPLitPool(SDNode *Node);
> -  SDNode *SelectToLitPool(SDNode *N);
> -
> -  SDNode* Select(SDNode*) override;
> -private:
> -  /// Get the opcode for table lookup instruction
> -  unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec);
> -
> -  /// Select NEON table lookup intrinsics.  NumVecs should be 1, 2, 3 or 4.
> -  /// IsExt is to indicate if the result will be extended with an argument.
> -  SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt);
> -
> -  /// Select NEON load intrinsics.  NumVecs should be 1, 2, 3 or 4.
> -  SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
> -                    const uint16_t *Opcode);
> -
> -  /// Select NEON store intrinsics.  NumVecs should be 1, 2, 3 or 4.
> -  SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
> -                    const uint16_t *Opcodes);
> -
> -  /// Form sequences of consecutive 64/128-bit registers for use in NEON
> -  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
> -  /// between 1 and 4 elements. If it contains a single element that is returned
> -  /// unchanged; otherwise a REG_SEQUENCE value is returned.
> -  SDValue createDTuple(ArrayRef<SDValue> Vecs);
> -  SDValue createQTuple(ArrayRef<SDValue> Vecs);
> -
> -  /// Generic helper for the createDTuple/createQTuple
> -  /// functions. Those should almost always be called instead.
> -  SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
> -                      unsigned SubRegs[]);
> -
> -  /// Select NEON load-duplicate intrinsics.  NumVecs should be 2, 3 or 4.
> -  /// The opcode array specifies the instructions used for load.
> -  SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
> -                       const uint16_t *Opcodes);
> -
> -  /// Select NEON load/store lane intrinsics.  NumVecs should be 2, 3 or 4.
> -  /// The opcode arrays specify the instructions used for load/store.
> -  SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
> -                          unsigned NumVecs, const uint16_t *Opcodes);
> -
> -  SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
> -                               SDValue Operand);
> -};
> -}
> -
> -bool
> -AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
> -                                              unsigned RegWidth) {
> -  const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
> -  if (!CN) return false;
> -
> -  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
> -  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
> -  // x-register.
> -  //
> -  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
> -  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
> -  // integers.
> -  bool IsExact;
> -
> -  // fbits is between 1 and 64 in the worst-case, which means the fmul
> -  // could have 2^64 as an actual operand. Need 65 bits of precision.
> -  APSInt IntVal(65, true);
> -  CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
> -
> -  // N.b. isPowerOf2 also checks for > 0.
> -  if (!IsExact || !IntVal.isPowerOf2()) return false;
> -  unsigned FBits = IntVal.logBase2();
> -
> -  // Checks above should have guaranteed that we haven't lost information in
> -  // finding FBits, but it must still be in range.
> -  if (FBits == 0 || FBits > RegWidth) return false;
> -
> -  FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
> -  return true;
> -}
> -
> -bool
> -AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
> -                                                 char ConstraintCode,
> -                                                 std::vector<SDValue> &OutOps) {
> -  switch (ConstraintCode) {
> -  default: llvm_unreachable("Unrecognised AArch64 memory constraint");
> -  case 'm':
> -    // FIXME: more freedom is actually permitted for 'm'. We can go
> -    // hunting for a base and an offset if we want. Of course, since
> -    // we don't really know how the operand is going to be used we're
> -    // probably restricted to the load/store pair's simm7 as an offset
> -    // range anyway.
> -  case 'Q':
> -    OutOps.push_back(Op);
> -  }
> -
> -  return false;
> -}
> -
> -bool
> -AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
> -  ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
> -  if (!Imm || !Imm->getValueAPF().isPosZero())
> -    return false;
> -
> -  // Doesn't actually carry any information, but keeps TableGen quiet.
> -  Dummy = CurDAG->getTargetConstant(0, MVT::i32);
> -  return true;
> -}
> -
> -bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
> -  uint32_t Bits;
> -  uint32_t RegWidth = N.getValueType().getSizeInBits();
> -
> -  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
> -  if (!CN) return false;
> -
> -  if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
> -    return false;
> -
> -  Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
> -  return true;
> -}
> -
> -SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
> -  SDNode *ResNode;
> -  SDLoc dl(Node);
> -  EVT DestType = Node->getValueType(0);
> -  unsigned DestWidth = DestType.getSizeInBits();
> -
> -  unsigned MOVOpcode;
> -  EVT MOVType;
> -  int UImm16, Shift;
> -  uint32_t LogicalBits;
> -
> -  uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
> -  if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
> -    MOVType = DestType;
> -    MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
> -  } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
> -    MOVType = DestType;
> -    MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
> -  } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
> -    // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
> -    // use a 32-bit instruction: "movn w0, 0xedbc".
> -    MOVType = MVT::i32;
> -    MOVOpcode = AArch64::MOVNwii;
> -  } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits))  {
> -    MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
> -    uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
> -
> -    return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
> -                              CurDAG->getRegister(ZR, DestType),
> -                              CurDAG->getTargetConstant(LogicalBits, MVT::i32));
> -  } else {
> -    // Can't handle it in one instruction. There's scope for permitting two (or
> -    // more) instructions, but that'll need more thought.
> -    return nullptr;
> -  }
> -
> -  ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
> -                                   CurDAG->getTargetConstant(UImm16, MVT::i32),
> -                                   CurDAG->getTargetConstant(Shift, MVT::i32));
> -
> -  if (MOVType != DestType) {
> -    ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
> -                          MVT::i64, MVT::i32, MVT::Other,
> -                          CurDAG->getTargetConstant(0, MVT::i64),
> -                          SDValue(ResNode, 0),
> -                          CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
> -  }
> -
> -  return ResNode;
> -}
> -
> -SDValue
> -AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL,
> -                                                const Constant *CV) {
> -  EVT PtrVT = getTargetLowering()->getPointerTy();
> -
> -  switch (getTargetLowering()->getTargetMachine().getCodeModel()) {
> -  case CodeModel::Small: {
> -    unsigned Alignment =
> -      getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
> -    return CurDAG->getNode(
> -        AArch64ISD::WrapperSmall, DL, PtrVT,
> -        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG),
> -        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12),
> -        CurDAG->getConstant(Alignment, MVT::i32));
> -  }
> -  case CodeModel::Large: {
> -    SDNode *LitAddr;
> -    LitAddr = CurDAG->getMachineNode(
> -        AArch64::MOVZxii, DL, PtrVT,
> -        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
> -        CurDAG->getTargetConstant(3, MVT::i32));
> -    LitAddr = CurDAG->getMachineNode(
> -        AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
> -        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
> -        CurDAG->getTargetConstant(2, MVT::i32));
> -    LitAddr = CurDAG->getMachineNode(
> -        AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
> -        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
> -        CurDAG->getTargetConstant(1, MVT::i32));
> -    LitAddr = CurDAG->getMachineNode(
> -        AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
> -        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC),
> -        CurDAG->getTargetConstant(0, MVT::i32));
> -    return SDValue(LitAddr, 0);
> -  }
> -  default:
> -    llvm_unreachable("Only small and large code models supported now");
> -  }
> -}
> -
> -SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
> -  SDLoc DL(Node);
> -  uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
> -  int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
> -  EVT DestType = Node->getValueType(0);
> -
> -  // Since we may end up loading a 64-bit constant from a 32-bit entry the
> -  // constant in the pool may have a different type to the eventual node.
> -  ISD::LoadExtType Extension;
> -  EVT MemType;
> -
> -  assert((DestType == MVT::i64 || DestType == MVT::i32)
> -         && "Only expect integer constants at the moment");
> -
> -  if (DestType == MVT::i32) {
> -    Extension = ISD::NON_EXTLOAD;
> -    MemType = MVT::i32;
> -  } else if (UnsignedVal <= UINT32_MAX) {
> -    Extension = ISD::ZEXTLOAD;
> -    MemType = MVT::i32;
> -  } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
> -    Extension = ISD::SEXTLOAD;
> -    MemType = MVT::i32;
> -  } else {
> -    Extension = ISD::NON_EXTLOAD;
> -    MemType = MVT::i64;
> -  }
> -
> -  Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
> -                                                  MemType.getSizeInBits()),
> -                                  UnsignedVal);
> -  SDValue PoolAddr = getConstantPoolItemAddress(DL, CV);
> -  unsigned Alignment =
> -    getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
> -
> -  return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
> -                            PoolAddr,
> -                            MachinePointerInfo::getConstantPool(), MemType,
> -                            /* isVolatile = */ false,
> -                            /* isNonTemporal = */ false,
> -                            Alignment).getNode();
> -}
> -
> -SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
> -  SDLoc DL(Node);
> -  const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
> -  EVT DestType = Node->getValueType(0);
> -
> -  unsigned Alignment =
> -    getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType());
> -  SDValue PoolAddr = getConstantPoolItemAddress(DL, FV);
> -
> -  return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
> -                         MachinePointerInfo::getConstantPool(),
> -                         /* isVolatile = */ false,
> -                         /* isNonTemporal = */ false,
> -                         /* isInvariant = */ true,
> -                         Alignment).getNode();
> -}
> -
> -bool
> -AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
> -                                       unsigned RegWidth) {
> -  const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
> -  if (!CN) return false;
> -
> -  uint64_t Val = CN->getZExtValue();
> -
> -  if (!isPowerOf2_64(Val)) return false;
> -
> -  unsigned TestedBit = Log2_64(Val);
> -  // Checks above should have guaranteed that we haven't lost information in
> -  // finding TestedBit, but it must still be in range.
> -  if (TestedBit >= RegWidth) return false;
> -
> -  FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
> -  return true;
> -}
> -
> -SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
> -                                          unsigned Op16,unsigned Op32,
> -                                          unsigned Op64) {
> -  // Mostly direct translation to the given operations, except that we preserve
> -  // the AtomicOrdering for use later on.
> -  AtomicSDNode *AN = cast<AtomicSDNode>(Node);
> -  EVT VT = AN->getMemoryVT();
> -
> -  unsigned Op;
> -  if (VT == MVT::i8)
> -    Op = Op8;
> -  else if (VT == MVT::i16)
> -    Op = Op16;
> -  else if (VT == MVT::i32)
> -    Op = Op32;
> -  else if (VT == MVT::i64)
> -    Op = Op64;
> -  else
> -    llvm_unreachable("Unexpected atomic operation");
> -
> -  SmallVector<SDValue, 4> Ops;
> -  for (unsigned i = 1; i < AN->getNumOperands(); ++i)
> -      Ops.push_back(AN->getOperand(i));
> -
> -  Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
> -  Ops.push_back(AN->getOperand(0)); // Chain moves to the end
> -
> -  return CurDAG->SelectNodeTo(Node, Op, AN->getValueType(0), MVT::Other, Ops);
> -}
> -
> -SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
> -  static unsigned RegClassIDs[] = { AArch64::DPairRegClassID,
> -                                    AArch64::DTripleRegClassID,
> -                                    AArch64::DQuadRegClassID };
> -  static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1,
> -                                AArch64::dsub_2, AArch64::dsub_3 };
> -
> -  return createTuple(Regs, RegClassIDs, SubRegs);
> -}
> -
> -SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
> -  static unsigned RegClassIDs[] = { AArch64::QPairRegClassID,
> -                                    AArch64::QTripleRegClassID,
> -                                    AArch64::QQuadRegClassID };
> -  static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1,
> -                                AArch64::qsub_2, AArch64::qsub_3 };
> -
> -  return createTuple(Regs, RegClassIDs, SubRegs);
> -}
> -
> -SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
> -                                         unsigned RegClassIDs[],
> -                                         unsigned SubRegs[]) {
> -  // There's no special register-class for a vector-list of 1 element: it's just
> -  // a vector.
> -  if (Regs.size() == 1)
> -    return Regs[0];
> -
> -  assert(Regs.size() >= 2 && Regs.size() <= 4);
> -
> -  SDLoc DL(Regs[0].getNode());
> -
> -  SmallVector<SDValue, 4> Ops;
> -
> -  // First operand of REG_SEQUENCE is the desired RegClass.
> -  Ops.push_back(
> -      CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
> -
> -  // Then we get pairs of source & subregister-position for the components.
> -  for (unsigned i = 0; i < Regs.size(); ++i) {
> -    Ops.push_back(Regs[i]);
> -    Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
> -  }
> -
> -  SDNode *N =
> -      CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
> -  return SDValue(N, 0);
> -}
> -
> -
> -// Get the register stride update opcode of a VLD/VST instruction that
> -// is otherwise equivalent to the given fixed stride updating instruction.
> -static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
> -  switch (Opc) {
> -  default: break;
> -  case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register;
> -  case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register;
> -  case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register;
> -  case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register;
> -  case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register;
> -  case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register;
> -  case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register;
> -  case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register;
> -
> -  case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
> -  case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
> -  case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
> -  case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
> -  case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
> -  case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
> -  case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register;
> -
> -  case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
> -  case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
> -  case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
> -  case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
> -  case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
> -  case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
> -  case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register;
> -
> -  case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
> -  case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
> -  case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
> -  case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
> -  case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
> -  case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
> -  case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;
> -
> -  case AArch64::LD1x2WB_8B_fixed: return AArch64::LD1x2WB_8B_register;
> -  case AArch64::LD1x2WB_4H_fixed: return AArch64::LD1x2WB_4H_register;
> -  case AArch64::LD1x2WB_2S_fixed: return AArch64::LD1x2WB_2S_register;
> -  case AArch64::LD1x2WB_1D_fixed: return AArch64::LD1x2WB_1D_register;
> -  case AArch64::LD1x2WB_16B_fixed: return AArch64::LD1x2WB_16B_register;
> -  case AArch64::LD1x2WB_8H_fixed: return AArch64::LD1x2WB_8H_register;
> -  case AArch64::LD1x2WB_4S_fixed: return AArch64::LD1x2WB_4S_register;
> -  case AArch64::LD1x2WB_2D_fixed: return AArch64::LD1x2WB_2D_register;
> -
> -  case AArch64::LD1x3WB_8B_fixed: return AArch64::LD1x3WB_8B_register;
> -  case AArch64::LD1x3WB_4H_fixed: return AArch64::LD1x3WB_4H_register;
> -  case AArch64::LD1x3WB_2S_fixed: return AArch64::LD1x3WB_2S_register;
> -  case AArch64::LD1x3WB_1D_fixed: return AArch64::LD1x3WB_1D_register;
> -  case AArch64::LD1x3WB_16B_fixed: return AArch64::LD1x3WB_16B_register;
> -  case AArch64::LD1x3WB_8H_fixed: return AArch64::LD1x3WB_8H_register;
> -  case AArch64::LD1x3WB_4S_fixed: return AArch64::LD1x3WB_4S_register;
> -  case AArch64::LD1x3WB_2D_fixed: return AArch64::LD1x3WB_2D_register;
> -
> -  case AArch64::LD1x4WB_8B_fixed: return AArch64::LD1x4WB_8B_register;
> -  case AArch64::LD1x4WB_4H_fixed: return AArch64::LD1x4WB_4H_register;
> -  case AArch64::LD1x4WB_2S_fixed: return AArch64::LD1x4WB_2S_register;
> -  case AArch64::LD1x4WB_1D_fixed: return AArch64::LD1x4WB_1D_register;
> -  case AArch64::LD1x4WB_16B_fixed: return AArch64::LD1x4WB_16B_register;
> -  case AArch64::LD1x4WB_8H_fixed: return AArch64::LD1x4WB_8H_register;
> -  case AArch64::LD1x4WB_4S_fixed: return AArch64::LD1x4WB_4S_register;
> -  case AArch64::LD1x4WB_2D_fixed: return AArch64::LD1x4WB_2D_register;
> -
> -  case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
> -  case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
> -  case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
> -  case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register;
> -  case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register;
> -  case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register;
> -  case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register;
> -  case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register;
> -
> -  case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
> -  case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
> -  case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
> -  case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
> -  case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
> -  case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
> -  case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register;
> -
> -  case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
> -  case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
> -  case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
> -  case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
> -  case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
> -  case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
> -  case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register;
> -
> -  case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
> -  case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
> -  case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
> -  case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
> -  case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
> -  case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
> -  case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
> -
> -  case AArch64::ST1x2WB_8B_fixed: return AArch64::ST1x2WB_8B_register;
> -  case AArch64::ST1x2WB_4H_fixed: return AArch64::ST1x2WB_4H_register;
> -  case AArch64::ST1x2WB_2S_fixed: return AArch64::ST1x2WB_2S_register;
> -  case AArch64::ST1x2WB_1D_fixed: return AArch64::ST1x2WB_1D_register;
> -  case AArch64::ST1x2WB_16B_fixed: return AArch64::ST1x2WB_16B_register;
> -  case AArch64::ST1x2WB_8H_fixed: return AArch64::ST1x2WB_8H_register;
> -  case AArch64::ST1x2WB_4S_fixed: return AArch64::ST1x2WB_4S_register;
> -  case AArch64::ST1x2WB_2D_fixed: return AArch64::ST1x2WB_2D_register;
> -
> -  case AArch64::ST1x3WB_8B_fixed: return AArch64::ST1x3WB_8B_register;
> -  case AArch64::ST1x3WB_4H_fixed: return AArch64::ST1x3WB_4H_register;
> -  case AArch64::ST1x3WB_2S_fixed: return AArch64::ST1x3WB_2S_register;
> -  case AArch64::ST1x3WB_1D_fixed: return AArch64::ST1x3WB_1D_register;
> -  case AArch64::ST1x3WB_16B_fixed: return AArch64::ST1x3WB_16B_register;
> -  case AArch64::ST1x3WB_8H_fixed: return AArch64::ST1x3WB_8H_register;
> -  case AArch64::ST1x3WB_4S_fixed: return AArch64::ST1x3WB_4S_register;
> -  case AArch64::ST1x3WB_2D_fixed: return AArch64::ST1x3WB_2D_register;
> -
> -  case AArch64::ST1x4WB_8B_fixed: return AArch64::ST1x4WB_8B_register;
> -  case AArch64::ST1x4WB_4H_fixed: return AArch64::ST1x4WB_4H_register;
> -  case AArch64::ST1x4WB_2S_fixed: return AArch64::ST1x4WB_2S_register;
> -  case AArch64::ST1x4WB_1D_fixed: return AArch64::ST1x4WB_1D_register;
> -  case AArch64::ST1x4WB_16B_fixed: return AArch64::ST1x4WB_16B_register;
> -  case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register;
> -  case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register;
> -  case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register;
> -
> -  // Post-index of duplicate loads
> -  case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register;
> -  case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register;
> -  case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register;
> -  case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register;
> -  case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register;
> -  case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register;
> -  case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register;
> -  case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register;
> -
> -  case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register;
> -  case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register;
> -  case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register;
> -  case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register;
> -  case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register;
> -  case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register;
> -  case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register;
> -  case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register;
> -
> -  case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register;
> -  case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register;
> -  case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register;
> -  case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register;
> -  case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register;
> -  case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register;
> -  case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register;
> -  case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register;
> -
> -  // Post-index of lane loads
> -  case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register;
> -  case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register;
> -  case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register;
> -  case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register;
> -
> -  case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register;
> -  case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register;
> -  case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register;
> -  case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register;
> -
> -  case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register;
> -  case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register;
> -  case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register;
> -  case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register;
> -
> -  // Post-index of lane stores
> -  case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register;
> -  case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register;
> -  case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register;
> -  case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register;
> -
> -  case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register;
> -  case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register;
> -  case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register;
> -  case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register;
> -
> -  case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register;
> -  case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register;
> -  case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register;
> -  case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register;
> -  }
> -  return Opc; // If not one we handle, return it unchanged.
> -}
> -
> -SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating,
> -                                       unsigned NumVecs,
> -                                       const uint16_t *Opcodes) {
> -  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
> -
> -  EVT VT = N->getValueType(0);
> -  unsigned OpcodeIndex;
> -  bool is64BitVector = VT.is64BitVector();
> -  switch (VT.getScalarType().getSizeInBits()) {
> -  case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
> -  case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
> -  case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
> -  case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
> -  default: llvm_unreachable("unhandled vector load type");
> -  }
> -  unsigned Opc = Opcodes[OpcodeIndex];
> -
> -  SmallVector<SDValue, 2> Ops;
> -  unsigned AddrOpIdx = isUpdating ? 1 : 2;
> -  Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
> -
> -  if (isUpdating) {
> -    SDValue Inc = N->getOperand(AddrOpIdx + 1);
> -    if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
> -      Opc = getVLDSTRegisterUpdateOpcode(Opc);
> -    Ops.push_back(Inc);
> -  }
> -
> -  Ops.push_back(N->getOperand(0)); // Push back the Chain
> -
> -  SmallVector<EVT, 3> ResTys;
> -  // Push back the type of return super register
> -  if (NumVecs == 1)
> -    ResTys.push_back(VT);
> -  else if (NumVecs == 3)
> -    ResTys.push_back(MVT::Untyped);
> -  else {
> -    EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
> -                                 is64BitVector ? NumVecs : NumVecs * 2);
> -    ResTys.push_back(ResTy);
> -  }
> -
> -  if (isUpdating)
> -    ResTys.push_back(MVT::i64); // Type of the updated register
> -  ResTys.push_back(MVT::Other); // Type of the Chain
> -  SDLoc dl(N);
> -  SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
> -
> -  // Transfer memoperands.
> -  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
> -  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
> -  cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
> -
> -  if (NumVecs == 1)
> -    return VLd;
> -
> -  // If NumVecs > 1, the return result is a super register containing 2-4
> -  // consecutive vector registers.
> -  SDValue SuperReg = SDValue(VLd, 0);
> -
> -  unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
> -  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
> -    ReplaceUses(SDValue(N, Vec),
> -                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
> -  // Update users of the Chain
> -  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
> -  if (isUpdating)
> -    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
> -
> -  return nullptr;
> -}
> -
> -SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating,
> -                                       unsigned NumVecs,
> -                                       const uint16_t *Opcodes) {
> -  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
> -  SDLoc dl(N);
> -
> -  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
> -  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
> -
> -  unsigned AddrOpIdx = isUpdating ? 1 : 2;
> -  unsigned Vec0Idx = 3;
> -  EVT VT = N->getOperand(Vec0Idx).getValueType();
> -  unsigned OpcodeIndex;
> -  bool is64BitVector = VT.is64BitVector();
> -  switch (VT.getScalarType().getSizeInBits()) {
> -  case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
> -  case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
> -  case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
> -  case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
> -  default: llvm_unreachable("unhandled vector store type");
> -  }
> -  unsigned Opc = Opcodes[OpcodeIndex];
> -
> -  SmallVector<EVT, 2> ResTys;
> -  if (isUpdating)
> -    ResTys.push_back(MVT::i64);
> -  ResTys.push_back(MVT::Other); // Type for the Chain
> -
> -  SmallVector<SDValue, 6> Ops;
> -  Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
> -
> -  if (isUpdating) {
> -    SDValue Inc = N->getOperand(AddrOpIdx + 1);
> -    if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
> -      Opc = getVLDSTRegisterUpdateOpcode(Opc);
> -    Ops.push_back(Inc);
> -  }
> -
> -  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
> -                               N->op_begin() + Vec0Idx + NumVecs);
> -  SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs);
> -  Ops.push_back(SrcReg);
> -
> -  // Push back the Chain
> -  Ops.push_back(N->getOperand(0));
> -
> -  // Transfer memoperands.
> -  SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
> -  cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
> -
> -  return VSt;
> -}
> -
> -SDValue
> -AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
> -                                          SDValue Operand) {
> -  SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL,
> -                        VT, VTD, MVT::Other,
> -                        CurDAG->getTargetConstant(0, MVT::i64),
> -                        Operand,
> -                        CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32));
> -  return SDValue(Reg, 0);
> -}
> -
> -SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
> -                                          unsigned NumVecs,
> -                                          const uint16_t *Opcodes) {
> -  assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range");
> -  SDLoc dl(N);
> -
> -  EVT VT = N->getValueType(0);
> -  unsigned OpcodeIndex;
> -  bool is64BitVector = VT.is64BitVector();
> -  switch (VT.getScalarType().getSizeInBits()) {
> -  case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
> -  case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
> -  case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
> -  case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
> -  default: llvm_unreachable("unhandled vector duplicate lane load type");
> -  }
> -  unsigned Opc = Opcodes[OpcodeIndex];
> -
> -  SDValue SuperReg;
> -  SmallVector<SDValue, 6> Ops;
> -  Ops.push_back(N->getOperand(1)); // Push back the Memory Address
> -  if (isUpdating) {
> -    SDValue Inc = N->getOperand(2);
> -    if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
> -      Opc = getVLDSTRegisterUpdateOpcode(Opc);
> -    Ops.push_back(Inc);
> -  }
> -  Ops.push_back(N->getOperand(0)); // Push back the Chain
> -
> -  SmallVector<EVT, 3> ResTys;
> -  // Push back the type of return super register
> -  if (NumVecs == 3)
> -    ResTys.push_back(MVT::Untyped);
> -  else {
> -    EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
> -                                 is64BitVector ? NumVecs : NumVecs * 2);
> -    ResTys.push_back(ResTy);
> -  }
> -  if (isUpdating)
> -    ResTys.push_back(MVT::i64); // Type of the updated register
> -  ResTys.push_back(MVT::Other); // Type of the Chain
> -  SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
> -
> -  // Transfer memoperands.
> -  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
> -  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
> -  cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
> -
> -  SuperReg = SDValue(VLdDup, 0);
> -  unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
> -  // Update uses of each registers in super register
> -  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
> -    ReplaceUses(SDValue(N, Vec),
> -                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
> -  // Update uses of the Chain
> -  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
> -  if (isUpdating)
> -    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
> -  return nullptr;
> -}
> -
> -// We only have 128-bit vector type of load/store lane instructions.
> -// If it is 64-bit vector, we also select it to the 128-bit instructions.
> -// Just use SUBREG_TO_REG to adapt the input to 128-bit vector and
> -// EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output.
> -SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
> -                                             bool isUpdating, unsigned NumVecs,
> -                                             const uint16_t *Opcodes) {
> -  assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
> -  SDLoc dl(N);
> -  unsigned AddrOpIdx = isUpdating ? 1 : 2;
> -  unsigned Vec0Idx = 3;
> -
> -  SDValue Chain = N->getOperand(0);
> -  unsigned Lane =
> -      cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
> -  EVT VT = N->getOperand(Vec0Idx).getValueType();
> -  bool is64BitVector = VT.is64BitVector();
> -  EVT VT64; // 64-bit Vector Type
> -
> -  if (is64BitVector) {
> -    VT64 = VT;
> -    VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(),
> -                          VT.getVectorNumElements() * 2);
> -  }
> -
> -  unsigned OpcodeIndex;
> -  switch (VT.getScalarType().getSizeInBits()) {
> -  case 8: OpcodeIndex = 0; break;
> -  case 16: OpcodeIndex = 1; break;
> -  case 32: OpcodeIndex = 2; break;
> -  case 64: OpcodeIndex = 3; break;
> -  default: llvm_unreachable("unhandled vector lane load/store type");
> -  }
> -  unsigned Opc = Opcodes[OpcodeIndex];
> -
> -  SmallVector<EVT, 3> ResTys;
> -  if (IsLoad) {
> -    // Push back the type of return super register
> -    if (NumVecs == 3)
> -      ResTys.push_back(MVT::Untyped);
> -    else {
> -      EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
> -                                   is64BitVector ? NumVecs : NumVecs * 2);
> -      ResTys.push_back(ResTy);
> -    }
> -  }
> -  if (isUpdating)
> -    ResTys.push_back(MVT::i64); // Type of the updated register
> -  ResTys.push_back(MVT::Other); // Type of Chain
> -  SmallVector<SDValue, 5> Ops;
> -  Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
> -  if (isUpdating) {
> -    SDValue Inc = N->getOperand(AddrOpIdx + 1);
> -    if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
> -      Opc = getVLDSTRegisterUpdateOpcode(Opc);
> -    Ops.push_back(Inc);
> -  }
> -
> -  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
> -                               N->op_begin() + Vec0Idx + NumVecs);
> -  if (is64BitVector)
> -    for (unsigned i = 0; i < Regs.size(); i++)
> -      Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]);
> -  SDValue SuperReg = createQTuple(Regs);
> -
> -  Ops.push_back(SuperReg); // Source Reg
> -  SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32);
> -  Ops.push_back(LaneValue);
> -  Ops.push_back(Chain); // Push back the Chain
> -
> -  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
> -  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
> -  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
> -  cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
> -  if (!IsLoad)
> -    return VLdLn;
> -
> -  // Extract the subregisters.
> -  SuperReg = SDValue(VLdLn, 0);
> -  unsigned Sub0 = AArch64::qsub_0;
> -  // Update uses of each registers in super register
> -  for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
> -    SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg);
> -    if (is64BitVector) {
> -      SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0);
> -    }
> -    ReplaceUses(SDValue(N, Vec), SUB0);
> -  }
> -  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
> -  if (isUpdating)
> -    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
> -  return nullptr;
> -}
> -
> -unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit,
> -                                        unsigned NumOfVec) {
> -  assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range");
> -
> -  unsigned Opc = 0;
> -  switch (NumOfVec) {
> -  default:
> -    break;
> -  case 1:
> -    if (IsExt)
> -      Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b;
> -    else
> -      Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b;
> -    break;
> -  case 2:
> -    if (IsExt)
> -      Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b;
> -    else
> -      Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b;
> -    break;
> -  case 3:
> -    if (IsExt)
> -      Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b;
> -    else
> -      Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b;
> -    break;
> -  case 4:
> -    if (IsExt)
> -      Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b;
> -    else
> -      Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b;
> -    break;
> -  }
> -
> -  return Opc;
> -}
> -
> -SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs,
> -                                        bool IsExt) {
> -  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
> -  SDLoc dl(N);
> -
> -  // Check the element of look up table is 64-bit or not
> -  unsigned Vec0Idx = IsExt ? 2 : 1;
> -  assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() &&
> -         "The element of lookup table for vtbl and vtbx must be 128-bit");
> -
> -  // Check the return value type is 64-bit or not
> -  EVT ResVT = N->getValueType(0);
> -  bool is64BitRes = ResVT.is64BitVector();
> -
> -  // Create new SDValue for vector list
> -  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
> -                               N->op_begin() + Vec0Idx + NumVecs);
> -  SDValue TblReg = createQTuple(Regs);
> -  unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs);
> -
> -  SmallVector<SDValue, 3> Ops;
> -  if (IsExt)
> -    Ops.push_back(N->getOperand(1));
> -  Ops.push_back(TblReg);
> -  Ops.push_back(N->getOperand(Vec0Idx + NumVecs));
> -  return CurDAG->getMachineNode(Opc, dl, ResVT, Ops);
> -}
> -
> -SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
> -  // Dump information about the Node being selected
> -  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
> -
> -  if (Node->isMachineOpcode()) {
> -    DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
> -    Node->setNodeId(-1);
> -    return nullptr;
> -  }
> -
> -  switch (Node->getOpcode()) {
> -  case ISD::ATOMIC_LOAD_ADD:
> -    return SelectAtomic(Node,
> -                        AArch64::ATOMIC_LOAD_ADD_I8,
> -                        AArch64::ATOMIC_LOAD_ADD_I16,
> -                        AArch64::ATOMIC_LOAD_ADD_I32,
> -                        AArch64::ATOMIC_LOAD_ADD_I64);
> -  case ISD::ATOMIC_LOAD_SUB:
> -    return SelectAtomic(Node,
> -                        AArch64::ATOMIC_LOAD_SUB_I8,
> -                        AArch64::ATOMIC_LOAD_SUB_I16,
> -                        AArch64::ATOMIC_LOAD_SUB_I32,
> -                        AArch64::ATOMIC_LOAD_SUB_I64);
> -  case ISD::ATOMIC_LOAD_AND:
> -    return SelectAtomic(Node,
> -                        AArch64::ATOMIC_LOAD_AND_I8,
> -                        AArch64::ATOMIC_LOAD_AND_I16,
> -                        AArch64::ATOMIC_LOAD_AND_I32,
> -                        AArch64::ATOMIC_LOAD_AND_I64);
> -  case ISD::ATOMIC_LOAD_OR:
> -    return SelectAtomic(Node,
> -                        AArch64::ATOMIC_LOAD_OR_I8,
> -                        AArch64::ATOMIC_LOAD_OR_I16,
> -                        AArch64::ATOMIC_LOAD_OR_I32,
> -                        AArch64::ATOMIC_LOAD_OR_I64);
> -  case ISD::ATOMIC_LOAD_XOR:
> -    return SelectAtomic(Node,
> -                        AArch64::ATOMIC_LOAD_XOR_I8,
> -                        AArch64::ATOMIC_LOAD_XOR_I16,
> -                        AArch64::ATOMIC_LOAD_XOR_I32,
> -                        AArch64::ATOMIC_LOAD_XOR_I64);
> -  case ISD::ATOMIC_LOAD_NAND:
> -    return SelectAtomic(Node,
> -                        AArch64::ATOMIC_LOAD_NAND_I8,
> -                        AArch64::ATOMIC_LOAD_NAND_I16,
> -                        AArch64::ATOMIC_LOAD_NAND_I32,
> -                        AArch64::ATOMIC_LOAD_NAND_I64);
> -  case ISD::ATOMIC_LOAD_MIN:
> -    return SelectAtomic(Node,
> -                        AArch64::ATOMIC_LOAD_MIN_I8,
> -                        AArch64::ATOMIC_LOAD_MIN_I16,
> -                        AArch64::ATOMIC_LOAD_MIN_I32,
> -                        AArch64::ATOMIC_LOAD_MIN_I64);
> -  case ISD::ATOMIC_LOAD_MAX:
> -    return SelectAtomic(Node,
> -                        AArch64::ATOMIC_LOAD_MAX_I8,
> -                        AArch64::ATOMIC_LOAD_MAX_I16,
> -                        AArch64::ATOMIC_LOAD_MAX_I32,
> -                        AArch64::ATOMIC_LOAD_MAX_I64);
> -  case ISD::ATOMIC_LOAD_UMIN:
> -    return SelectAtomic(Node,
> -                        AArch64::ATOMIC_LOAD_UMIN_I8,
> -                        AArch64::ATOMIC_LOAD_UMIN_I16,
> -                        AArch64::ATOMIC_LOAD_UMIN_I32,
> -                        AArch64::ATOMIC_LOAD_UMIN_I64);
> -  case ISD::ATOMIC_LOAD_UMAX:
> -    return SelectAtomic(Node,
> -                        AArch64::ATOMIC_LOAD_UMAX_I8,
> -                        AArch64::ATOMIC_LOAD_UMAX_I16,
> -                        AArch64::ATOMIC_LOAD_UMAX_I32,
> -                        AArch64::ATOMIC_LOAD_UMAX_I64);
> -  case ISD::ATOMIC_SWAP:
> -    return SelectAtomic(Node,
> -                        AArch64::ATOMIC_SWAP_I8,
> -                        AArch64::ATOMIC_SWAP_I16,
> -                        AArch64::ATOMIC_SWAP_I32,
> -                        AArch64::ATOMIC_SWAP_I64);
> -  case ISD::ATOMIC_CMP_SWAP:
> -    return SelectAtomic(Node,
> -                        AArch64::ATOMIC_CMP_SWAP_I8,
> -                        AArch64::ATOMIC_CMP_SWAP_I16,
> -                        AArch64::ATOMIC_CMP_SWAP_I32,
> -                        AArch64::ATOMIC_CMP_SWAP_I64);
> -  case ISD::FrameIndex: {
> -    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
> -    EVT PtrTy = getTargetLowering()->getPointerTy();
> -    SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
> -    return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
> -                                TFI, CurDAG->getTargetConstant(0, PtrTy));
> -  }
> -  case ISD::Constant: {
> -    SDNode *ResNode = nullptr;
> -    if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
> -      // XZR and WZR are probably even better than an actual move: most of the
> -      // time they can be folded into another instruction with *no* cost.
> -
> -      EVT Ty = Node->getValueType(0);
> -      assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
> -      uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
> -      ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
> -                                       SDLoc(Node),
> -                                       Register, Ty).getNode();
> -    }
> -
> -    // Next best option is a move-immediate, see if we can do that.
> -    if (!ResNode) {
> -      ResNode = TrySelectToMoveImm(Node);
> -    }
> -
> -    if (ResNode)
> -      return ResNode;
> -
> -    // If even that fails we fall back to a lit-pool entry at the moment. Future
> -    // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions.
> -    ResNode = SelectToLitPool(Node);
> -    assert(ResNode && "We need *some* way to materialise a constant");
> -
> -    // We want to continue selection at this point since the litpool access
> -    // generated used generic nodes for simplicity.
> -    ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
> -    Node = ResNode;
> -    break;
> -  }
> -  case ISD::ConstantFP: {
> -    if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
> -      // FMOV will take care of it from TableGen
> -      break;
> -    }
> -
> -    SDNode *ResNode = LowerToFPLitPool(Node);
> -    ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
> -
> -    // We want to continue selection at this point since the litpool access
> -    // generated used generic nodes for simplicity.
> -    Node = ResNode;
> -    break;
> -  }
> -  case AArch64ISD::NEON_LD1_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::LD1WB_8B_fixed,  AArch64::LD1WB_4H_fixed,
> -      AArch64::LD1WB_2S_fixed,  AArch64::LD1WB_1D_fixed,
> -      AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
> -      AArch64::LD1WB_4S_fixed,  AArch64::LD1WB_2D_fixed
> -    };
> -    return SelectVLD(Node, true, 1, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD2_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::LD2WB_8B_fixed,  AArch64::LD2WB_4H_fixed,
> -      AArch64::LD2WB_2S_fixed,  AArch64::LD1x2WB_1D_fixed,
> -      AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
> -      AArch64::LD2WB_4S_fixed,  AArch64::LD2WB_2D_fixed
> -    };
> -    return SelectVLD(Node, true, 2, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD3_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::LD3WB_8B_fixed,  AArch64::LD3WB_4H_fixed,
> -      AArch64::LD3WB_2S_fixed,  AArch64::LD1x3WB_1D_fixed,
> -      AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
> -      AArch64::LD3WB_4S_fixed,  AArch64::LD3WB_2D_fixed
> -    };
> -    return SelectVLD(Node, true, 3, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD4_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::LD4WB_8B_fixed,  AArch64::LD4WB_4H_fixed,
> -      AArch64::LD4WB_2S_fixed,  AArch64::LD1x4WB_1D_fixed,
> -      AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
> -      AArch64::LD4WB_4S_fixed,  AArch64::LD4WB_2D_fixed
> -    };
> -    return SelectVLD(Node, true, 4, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD1x2_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::LD1x2WB_8B_fixed,  AArch64::LD1x2WB_4H_fixed,
> -      AArch64::LD1x2WB_2S_fixed,  AArch64::LD1x2WB_1D_fixed,
> -      AArch64::LD1x2WB_16B_fixed, AArch64::LD1x2WB_8H_fixed,
> -      AArch64::LD1x2WB_4S_fixed,  AArch64::LD1x2WB_2D_fixed
> -    };
> -    return SelectVLD(Node, true, 2, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD1x3_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::LD1x3WB_8B_fixed,  AArch64::LD1x3WB_4H_fixed,
> -      AArch64::LD1x3WB_2S_fixed,  AArch64::LD1x3WB_1D_fixed,
> -      AArch64::LD1x3WB_16B_fixed, AArch64::LD1x3WB_8H_fixed,
> -      AArch64::LD1x3WB_4S_fixed,  AArch64::LD1x3WB_2D_fixed
> -    };
> -    return SelectVLD(Node, true, 3, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD1x4_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::LD1x4WB_8B_fixed,  AArch64::LD1x4WB_4H_fixed,
> -      AArch64::LD1x4WB_2S_fixed,  AArch64::LD1x4WB_1D_fixed,
> -      AArch64::LD1x4WB_16B_fixed, AArch64::LD1x4WB_8H_fixed,
> -      AArch64::LD1x4WB_4S_fixed,  AArch64::LD1x4WB_2D_fixed
> -    };
> -    return SelectVLD(Node, true, 4, Opcodes);
> -  }
> -  case AArch64ISD::NEON_ST1_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::ST1WB_8B_fixed,  AArch64::ST1WB_4H_fixed,
> -      AArch64::ST1WB_2S_fixed,  AArch64::ST1WB_1D_fixed,
> -      AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
> -      AArch64::ST1WB_4S_fixed,  AArch64::ST1WB_2D_fixed
> -    };
> -    return SelectVST(Node, true, 1, Opcodes);
> -  }
> -  case AArch64ISD::NEON_ST2_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::ST2WB_8B_fixed,  AArch64::ST2WB_4H_fixed,
> -      AArch64::ST2WB_2S_fixed,  AArch64::ST1x2WB_1D_fixed,
> -      AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
> -      AArch64::ST2WB_4S_fixed,  AArch64::ST2WB_2D_fixed
> -    };
> -    return SelectVST(Node, true, 2, Opcodes);
> -  }
> -  case AArch64ISD::NEON_ST3_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::ST3WB_8B_fixed,  AArch64::ST3WB_4H_fixed,
> -      AArch64::ST3WB_2S_fixed,  AArch64::ST1x3WB_1D_fixed,
> -      AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
> -      AArch64::ST3WB_4S_fixed,  AArch64::ST3WB_2D_fixed
> -    };
> -    return SelectVST(Node, true, 3, Opcodes);
> -  }
> -  case AArch64ISD::NEON_ST4_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::ST4WB_8B_fixed,  AArch64::ST4WB_4H_fixed,
> -      AArch64::ST4WB_2S_fixed,  AArch64::ST1x4WB_1D_fixed,
> -      AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
> -      AArch64::ST4WB_4S_fixed,  AArch64::ST4WB_2D_fixed
> -    };
> -    return SelectVST(Node, true, 4, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD2DUP: {
> -    static const uint16_t Opcodes[] = {
> -        AArch64::LD2R_8B, AArch64::LD2R_4H, AArch64::LD2R_2S,
> -        AArch64::LD2R_1D, AArch64::LD2R_16B, AArch64::LD2R_8H,
> -        AArch64::LD2R_4S, AArch64::LD2R_2D
> -    };
> -    return SelectVLDDup(Node, false, 2, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD3DUP: {
> -    static const uint16_t Opcodes[] = {
> -        AArch64::LD3R_8B, AArch64::LD3R_4H, AArch64::LD3R_2S,
> -        AArch64::LD3R_1D, AArch64::LD3R_16B, AArch64::LD3R_8H,
> -        AArch64::LD3R_4S, AArch64::LD3R_2D
> -    };
> -    return SelectVLDDup(Node, false, 3, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD4DUP: {
> -    static const uint16_t Opcodes[] = {
> -        AArch64::LD4R_8B, AArch64::LD4R_4H, AArch64::LD4R_2S,
> -        AArch64::LD4R_1D, AArch64::LD4R_16B, AArch64::LD4R_8H,
> -        AArch64::LD4R_4S, AArch64::LD4R_2D
> -    };
> -    return SelectVLDDup(Node, false, 4, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD2DUP_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::LD2R_WB_8B_fixed,  AArch64::LD2R_WB_4H_fixed,
> -      AArch64::LD2R_WB_2S_fixed,  AArch64::LD2R_WB_1D_fixed,
> -      AArch64::LD2R_WB_16B_fixed, AArch64::LD2R_WB_8H_fixed,
> -      AArch64::LD2R_WB_4S_fixed,  AArch64::LD2R_WB_2D_fixed
> -    };
> -    return SelectVLDDup(Node, true, 2, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD3DUP_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::LD3R_WB_8B_fixed,  AArch64::LD3R_WB_4H_fixed,
> -      AArch64::LD3R_WB_2S_fixed,  AArch64::LD3R_WB_1D_fixed,
> -      AArch64::LD3R_WB_16B_fixed, AArch64::LD3R_WB_8H_fixed,
> -      AArch64::LD3R_WB_4S_fixed,  AArch64::LD3R_WB_2D_fixed
> -    };
> -    return SelectVLDDup(Node, true, 3, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD4DUP_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::LD4R_WB_8B_fixed,  AArch64::LD4R_WB_4H_fixed,
> -      AArch64::LD4R_WB_2S_fixed,  AArch64::LD4R_WB_1D_fixed,
> -      AArch64::LD4R_WB_16B_fixed, AArch64::LD4R_WB_8H_fixed,
> -      AArch64::LD4R_WB_4S_fixed,  AArch64::LD4R_WB_2D_fixed
> -    };
> -    return SelectVLDDup(Node, true, 4, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD2LN_UPD: {
> -    static const uint16_t Opcodes[] = {
> -        AArch64::LD2LN_WB_B_fixed, AArch64::LD2LN_WB_H_fixed,
> -        AArch64::LD2LN_WB_S_fixed, AArch64::LD2LN_WB_D_fixed
> -    };
> -    return SelectVLDSTLane(Node, true, true, 2, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD3LN_UPD: {
> -    static const uint16_t Opcodes[] = {
> -        AArch64::LD3LN_WB_B_fixed, AArch64::LD3LN_WB_H_fixed,
> -        AArch64::LD3LN_WB_S_fixed, AArch64::LD3LN_WB_D_fixed
> -    };
> -    return SelectVLDSTLane(Node, true, true, 3, Opcodes);
> -  }
> -  case AArch64ISD::NEON_LD4LN_UPD: {
> -    static const uint16_t Opcodes[] = {
> -        AArch64::LD4LN_WB_B_fixed, AArch64::LD4LN_WB_H_fixed,
> -        AArch64::LD4LN_WB_S_fixed, AArch64::LD4LN_WB_D_fixed
> -    };
> -    return SelectVLDSTLane(Node, true, true, 4, Opcodes);
> -  }
> -  case AArch64ISD::NEON_ST2LN_UPD: {
> -    static const uint16_t Opcodes[] = {
> -        AArch64::ST2LN_WB_B_fixed, AArch64::ST2LN_WB_H_fixed,
> -        AArch64::ST2LN_WB_S_fixed, AArch64::ST2LN_WB_D_fixed
> -    };
> -    return SelectVLDSTLane(Node, false, true, 2, Opcodes);
> -  }
> -  case AArch64ISD::NEON_ST3LN_UPD: {
> -    static const uint16_t Opcodes[] = {
> -        AArch64::ST3LN_WB_B_fixed, AArch64::ST3LN_WB_H_fixed,
> -        AArch64::ST3LN_WB_S_fixed, AArch64::ST3LN_WB_D_fixed
> -    };
> -    return SelectVLDSTLane(Node, false, true, 3, Opcodes);
> -  }
> -  case AArch64ISD::NEON_ST4LN_UPD: {
> -    static const uint16_t Opcodes[] = {
> -        AArch64::ST4LN_WB_B_fixed, AArch64::ST4LN_WB_H_fixed,
> -        AArch64::ST4LN_WB_S_fixed, AArch64::ST4LN_WB_D_fixed
> -    };
> -    return SelectVLDSTLane(Node, false, true, 4, Opcodes);
> -  }
> -  case AArch64ISD::NEON_ST1x2_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::ST1x2WB_8B_fixed,  AArch64::ST1x2WB_4H_fixed,
> -      AArch64::ST1x2WB_2S_fixed,  AArch64::ST1x2WB_1D_fixed,
> -      AArch64::ST1x2WB_16B_fixed, AArch64::ST1x2WB_8H_fixed,
> -      AArch64::ST1x2WB_4S_fixed,  AArch64::ST1x2WB_2D_fixed
> -    };
> -    return SelectVST(Node, true, 2, Opcodes);
> -  }
> -  case AArch64ISD::NEON_ST1x3_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::ST1x3WB_8B_fixed,  AArch64::ST1x3WB_4H_fixed,
> -      AArch64::ST1x3WB_2S_fixed,  AArch64::ST1x3WB_1D_fixed,
> -      AArch64::ST1x3WB_16B_fixed, AArch64::ST1x3WB_8H_fixed,
> -      AArch64::ST1x3WB_4S_fixed,  AArch64::ST1x3WB_2D_fixed
> -    };
> -    return SelectVST(Node, true, 3, Opcodes);
> -  }
> -  case AArch64ISD::NEON_ST1x4_UPD: {
> -    static const uint16_t Opcodes[] = {
> -      AArch64::ST1x4WB_8B_fixed,  AArch64::ST1x4WB_4H_fixed,
> -      AArch64::ST1x4WB_2S_fixed,  AArch64::ST1x4WB_1D_fixed,
> -      AArch64::ST1x4WB_16B_fixed, AArch64::ST1x4WB_8H_fixed,
> -      AArch64::ST1x4WB_4S_fixed,  AArch64::ST1x4WB_2D_fixed
> -    };
> -    return SelectVST(Node, true, 4, Opcodes);
> -  }
> -  case ISD::INTRINSIC_WO_CHAIN: {
> -    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
> -    bool IsExt = false;
> -    switch (IntNo) {
> -      default:
> -        break;
> -      case Intrinsic::aarch64_neon_vtbx1:
> -        IsExt = true;
> -      case Intrinsic::aarch64_neon_vtbl1:
> -        return SelectVTBL(Node, 1, IsExt);
> -      case Intrinsic::aarch64_neon_vtbx2:
> -        IsExt = true;
> -      case Intrinsic::aarch64_neon_vtbl2:
> -        return SelectVTBL(Node, 2, IsExt);
> -      case Intrinsic::aarch64_neon_vtbx3:
> -        IsExt = true;
> -      case Intrinsic::aarch64_neon_vtbl3:
> -        return SelectVTBL(Node, 3, IsExt);
> -      case Intrinsic::aarch64_neon_vtbx4:
> -        IsExt = true;
> -      case Intrinsic::aarch64_neon_vtbl4:
> -        return SelectVTBL(Node, 4, IsExt);
> -    }
> -    break;
> -  }
> -  case ISD::INTRINSIC_VOID:
> -  case ISD::INTRINSIC_W_CHAIN: {
> -    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
> -    switch (IntNo) {
> -    default:
> -      break;
> -    case Intrinsic::arm_neon_vld1: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::LD1_8B,  AArch64::LD1_4H, AArch64::LD1_2S, AArch64::LD1_1D,
> -          AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D
> -      };
> -      return SelectVLD(Node, false, 1, Opcodes);
> -    }
> -    case Intrinsic::arm_neon_vld2: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::LD2_8B,  AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1x2_1D,
> -          AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D
> -      };
> -      return SelectVLD(Node, false, 2, Opcodes);
> -    }
> -    case Intrinsic::arm_neon_vld3: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::LD3_8B,  AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1x3_1D,
> -          AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D
> -      };
> -      return SelectVLD(Node, false, 3, Opcodes);
> -    }
> -    case Intrinsic::arm_neon_vld4: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::LD4_8B,  AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1x4_1D,
> -          AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D
> -      };
> -      return SelectVLD(Node, false, 4, Opcodes);
> -    }
> -    case Intrinsic::aarch64_neon_vld1x2: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::LD1x2_8B, AArch64::LD1x2_4H,  AArch64::LD1x2_2S,
> -          AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H,
> -          AArch64::LD1x2_4S, AArch64::LD1x2_2D
> -      };
> -      return SelectVLD(Node, false, 2, Opcodes);
> -    }
> -    case Intrinsic::aarch64_neon_vld1x3: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::LD1x3_8B, AArch64::LD1x3_4H,  AArch64::LD1x3_2S,
> -          AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H,
> -          AArch64::LD1x3_4S, AArch64::LD1x3_2D
> -      };
> -      return SelectVLD(Node, false, 3, Opcodes);
> -    }
> -    case Intrinsic::aarch64_neon_vld1x4: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::LD1x4_8B, AArch64::LD1x4_4H,  AArch64::LD1x4_2S,
> -          AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H,
> -          AArch64::LD1x4_4S, AArch64::LD1x4_2D
> -      };
> -      return SelectVLD(Node, false, 4, Opcodes);
> -    }
> -    case Intrinsic::arm_neon_vst1: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::ST1_8B,  AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D,
> -          AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D
> -      };
> -      return SelectVST(Node, false, 1, Opcodes);
> -    }
> -    case Intrinsic::arm_neon_vst2: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::ST2_8B,  AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1x2_1D,
> -          AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D
> -      };
> -      return SelectVST(Node, false, 2, Opcodes);
> -    }
> -    case Intrinsic::arm_neon_vst3: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::ST3_8B,  AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1x3_1D,
> -          AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D
> -      };
> -      return SelectVST(Node, false, 3, Opcodes);
> -    }
> -    case Intrinsic::arm_neon_vst4: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::ST4_8B,  AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1x4_1D,
> -          AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D
> -      };
> -      return SelectVST(Node, false, 4, Opcodes);
> -    }
> -    case Intrinsic::aarch64_neon_vst1x2: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::ST1x2_8B, AArch64::ST1x2_4H,  AArch64::ST1x2_2S,
> -          AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H,
> -          AArch64::ST1x2_4S, AArch64::ST1x2_2D
> -      };
> -      return SelectVST(Node, false, 2, Opcodes);
> -    }
> -    case Intrinsic::aarch64_neon_vst1x3: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::ST1x3_8B, AArch64::ST1x3_4H,  AArch64::ST1x3_2S,
> -          AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H,
> -          AArch64::ST1x3_4S, AArch64::ST1x3_2D
> -      };
> -      return SelectVST(Node, false, 3, Opcodes);
> -    }
> -    case Intrinsic::aarch64_neon_vst1x4: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::ST1x4_8B, AArch64::ST1x4_4H,  AArch64::ST1x4_2S,
> -          AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H,
> -          AArch64::ST1x4_4S, AArch64::ST1x4_2D
> -      };
> -      return SelectVST(Node, false, 4, Opcodes);
> -    }
> -    case Intrinsic::arm_neon_vld2lane: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::LD2LN_B, AArch64::LD2LN_H, AArch64::LD2LN_S, AArch64::LD2LN_D
> -      };
> -      return SelectVLDSTLane(Node, true, false, 2, Opcodes);
> -    }
> -    case Intrinsic::arm_neon_vld3lane: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::LD3LN_B, AArch64::LD3LN_H, AArch64::LD3LN_S, AArch64::LD3LN_D
> -      };
> -      return SelectVLDSTLane(Node, true, false, 3, Opcodes);
> -    }
> -    case Intrinsic::arm_neon_vld4lane: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::LD4LN_B, AArch64::LD4LN_H, AArch64::LD4LN_S, AArch64::LD4LN_D
> -      };
> -      return SelectVLDSTLane(Node, true, false, 4, Opcodes);
> -    }
> -    case Intrinsic::arm_neon_vst2lane: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::ST2LN_B, AArch64::ST2LN_H, AArch64::ST2LN_S, AArch64::ST2LN_D
> -      };
> -      return SelectVLDSTLane(Node, false, false, 2, Opcodes);
> -    }
> -    case Intrinsic::arm_neon_vst3lane: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::ST3LN_B, AArch64::ST3LN_H, AArch64::ST3LN_S, AArch64::ST3LN_D
> -      };
> -      return SelectVLDSTLane(Node, false, false, 3, Opcodes);
> -    }
> -    case Intrinsic::arm_neon_vst4lane: {
> -      static const uint16_t Opcodes[] = {
> -          AArch64::ST4LN_B, AArch64::ST4LN_H, AArch64::ST4LN_S, AArch64::ST4LN_D
> -      };
> -      return SelectVLDSTLane(Node, false, false, 4, Opcodes);
> -    }
> -    } // End of switch IntNo
> -    break;
> -  } // End of case ISD::INTRINSIC_VOID and :ISD::INTRINSIC_W_CHAIN
> -  default:
> -    break; // Let generic code handle it
> -  }
> -
> -  SDNode *ResNode = SelectCode(Node);
> -
> -  DEBUG(dbgs() << "=> ";
> -        if (ResNode == nullptr || ResNode == Node)
> -          Node->dump(CurDAG);
> -        else
> -          ResNode->dump(CurDAG);
> -        dbgs() << "\n");
> -
> -  return ResNode;
> -}
> -
> -/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
> -/// instruction scheduling.
> -FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
> -                                         CodeGenOpt::Level OptLevel) {
> -  return new AArch64DAGToDAGISel(TM, OptLevel);
> -}
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (removed)
> @@ -1,5564 +0,0 @@
> -//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file defines the interfaces that AArch64 uses to lower LLVM code into a
> -// selection DAG.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#include "AArch64.h"
> -#include "AArch64ISelLowering.h"
> -#include "AArch64MachineFunctionInfo.h"
> -#include "AArch64Subtarget.h"
> -#include "AArch64TargetMachine.h"
> -#include "AArch64TargetObjectFile.h"
> -#include "Utils/AArch64BaseInfo.h"
> -#include "llvm/CodeGen/Analysis.h"
> -#include "llvm/CodeGen/CallingConvLower.h"
> -#include "llvm/CodeGen/MachineFrameInfo.h"
> -#include "llvm/CodeGen/MachineInstrBuilder.h"
> -#include "llvm/CodeGen/MachineRegisterInfo.h"
> -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
> -#include "llvm/IR/CallingConv.h"
> -#include "llvm/Support/MathExtras.h"
> -
> -using namespace llvm;
> -
> -#define DEBUG_TYPE "aarch64-isel"
> -
> -static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
> -  assert (TM.getSubtarget<AArch64Subtarget>().isTargetELF() &&
> -          "unknown subtarget type");
> -  return new AArch64ElfTargetObjectFile();
> -}
> -
> -AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
> -  : TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) {
> -
> -  const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
> -
> -  // SIMD compares set the entire lane's bits to 1
> -  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
> -
> -  // Scalar register <-> type mapping
> -  addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
> -  addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
> -
> -  if (Subtarget->hasFPARMv8()) {
> -    addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
> -    addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
> -    addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
> -    addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
> -  }
> -
> -  if (Subtarget->hasNEON()) {
> -    // And the vectors
> -    addRegisterClass(MVT::v1i8,  &AArch64::FPR8RegClass);
> -    addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass);
> -    addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass);
> -    addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
> -    addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
> -    addRegisterClass(MVT::v8i8,  &AArch64::FPR64RegClass);
> -    addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
> -    addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
> -    addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
> -    addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass);
> -    addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass);
> -    addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass);
> -    addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass);
> -    addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass);
> -    addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass);
> -    addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass);
> -  }
> -
> -  computeRegisterProperties();
> -
> -  // We combine OR nodes for bitfield and NEON BSL operations.
> -  setTargetDAGCombine(ISD::OR);
> -
> -  setTargetDAGCombine(ISD::AND);
> -  setTargetDAGCombine(ISD::SRA);
> -  setTargetDAGCombine(ISD::SRL);
> -  setTargetDAGCombine(ISD::SHL);
> -
> -  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
> -  setTargetDAGCombine(ISD::INTRINSIC_VOID);
> -  setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
> -
> -  // AArch64 does not have i1 loads, or much of anything for i1 really.
> -  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
> -  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
> -  setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
> -
> -  setStackPointerRegisterToSaveRestore(AArch64::XSP);
> -  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
> -  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
> -  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
> -
> -  // We'll lower globals to wrappers for selection.
> -  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
> -  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
> -
> -  // A64 instructions have the comparison predicate attached to the user of the
> -  // result, but having a separate comparison is valuable for matching.
> -  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
> -  setOperationAction(ISD::BR_CC, MVT::i64, Custom);
> -  setOperationAction(ISD::BR_CC, MVT::f32, Custom);
> -  setOperationAction(ISD::BR_CC, MVT::f64, Custom);
> -
> -  setOperationAction(ISD::SELECT, MVT::i32, Custom);
> -  setOperationAction(ISD::SELECT, MVT::i64, Custom);
> -  setOperationAction(ISD::SELECT, MVT::f32, Custom);
> -  setOperationAction(ISD::SELECT, MVT::f64, Custom);
> -
> -  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
> -  setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
> -  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
> -  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
> -
> -  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
> -
> -  setOperationAction(ISD::SETCC, MVT::i32, Custom);
> -  setOperationAction(ISD::SETCC, MVT::i64, Custom);
> -  setOperationAction(ISD::SETCC, MVT::f32, Custom);
> -  setOperationAction(ISD::SETCC, MVT::f64, Custom);
> -
> -  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
> -  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
> -  setOperationAction(ISD::JumpTable, MVT::i64, Custom);
> -
> -  setOperationAction(ISD::VASTART, MVT::Other, Custom);
> -  setOperationAction(ISD::VACOPY, MVT::Other, Custom);
> -  setOperationAction(ISD::VAEND, MVT::Other, Expand);
> -  setOperationAction(ISD::VAARG, MVT::Other, Expand);
> -
> -  setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
> -  setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
> -
> -  setOperationAction(ISD::ROTL, MVT::i32, Expand);
> -  setOperationAction(ISD::ROTL, MVT::i64, Expand);
> -
> -  setOperationAction(ISD::UREM, MVT::i32, Expand);
> -  setOperationAction(ISD::UREM, MVT::i64, Expand);
> -  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
> -  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
> -
> -  setOperationAction(ISD::SREM, MVT::i32, Expand);
> -  setOperationAction(ISD::SREM, MVT::i64, Expand);
> -  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
> -  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
> -
> -  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
> -  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
> -  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
> -  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
> -
> -  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
> -  setOperationAction(ISD::CTPOP, MVT::i64, Expand);
> -
> -  // Legal floating-point operations.
> -  setOperationAction(ISD::FABS, MVT::f32, Legal);
> -  setOperationAction(ISD::FABS, MVT::f64, Legal);
> -
> -  setOperationAction(ISD::FCEIL, MVT::f32, Legal);
> -  setOperationAction(ISD::FCEIL, MVT::f64, Legal);
> -
> -  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
> -  setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
> -
> -  setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
> -  setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
> -
> -  setOperationAction(ISD::FNEG, MVT::f32, Legal);
> -  setOperationAction(ISD::FNEG, MVT::f64, Legal);
> -
> -  setOperationAction(ISD::FRINT, MVT::f32, Legal);
> -  setOperationAction(ISD::FRINT, MVT::f64, Legal);
> -
> -  setOperationAction(ISD::FSQRT, MVT::f32, Legal);
> -  setOperationAction(ISD::FSQRT, MVT::f64, Legal);
> -
> -  setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
> -  setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
> -
> -  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
> -  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
> -  setOperationAction(ISD::ConstantFP, MVT::f128, Legal);
> -
> -  // Illegal floating-point operations.
> -  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
> -  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
> -
> -  setOperationAction(ISD::FCOS, MVT::f32, Expand);
> -  setOperationAction(ISD::FCOS, MVT::f64, Expand);
> -
> -  setOperationAction(ISD::FEXP, MVT::f32, Expand);
> -  setOperationAction(ISD::FEXP, MVT::f64, Expand);
> -
> -  setOperationAction(ISD::FEXP2, MVT::f32, Expand);
> -  setOperationAction(ISD::FEXP2, MVT::f64, Expand);
> -
> -  setOperationAction(ISD::FLOG, MVT::f32, Expand);
> -  setOperationAction(ISD::FLOG, MVT::f64, Expand);
> -
> -  setOperationAction(ISD::FLOG2, MVT::f32, Expand);
> -  setOperationAction(ISD::FLOG2, MVT::f64, Expand);
> -
> -  setOperationAction(ISD::FLOG10, MVT::f32, Expand);
> -  setOperationAction(ISD::FLOG10, MVT::f64, Expand);
> -
> -  setOperationAction(ISD::FPOW, MVT::f32, Expand);
> -  setOperationAction(ISD::FPOW, MVT::f64, Expand);
> -
> -  setOperationAction(ISD::FPOWI, MVT::f32, Expand);
> -  setOperationAction(ISD::FPOWI, MVT::f64, Expand);
> -
> -  setOperationAction(ISD::FREM, MVT::f32, Expand);
> -  setOperationAction(ISD::FREM, MVT::f64, Expand);
> -
> -  setOperationAction(ISD::FSIN, MVT::f32, Expand);
> -  setOperationAction(ISD::FSIN, MVT::f64, Expand);
> -
> -  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
> -  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
> -
> -  // Virtually no operation on f128 is legal, but LLVM can't expand them when
> -  // there's a valid register class, so we need custom operations in most cases.
> -  setOperationAction(ISD::FABS,       MVT::f128, Expand);
> -  setOperationAction(ISD::FADD,       MVT::f128, Custom);
> -  setOperationAction(ISD::FCOPYSIGN,  MVT::f128, Expand);
> -  setOperationAction(ISD::FCOS,       MVT::f128, Expand);
> -  setOperationAction(ISD::FDIV,       MVT::f128, Custom);
> -  setOperationAction(ISD::FMA,        MVT::f128, Expand);
> -  setOperationAction(ISD::FMUL,       MVT::f128, Custom);
> -  setOperationAction(ISD::FNEG,       MVT::f128, Expand);
> -  setOperationAction(ISD::FP_EXTEND,  MVT::f128, Expand);
> -  setOperationAction(ISD::FP_ROUND,   MVT::f128, Expand);
> -  setOperationAction(ISD::FPOW,       MVT::f128, Expand);
> -  setOperationAction(ISD::FREM,       MVT::f128, Expand);
> -  setOperationAction(ISD::FRINT,      MVT::f128, Expand);
> -  setOperationAction(ISD::FSIN,       MVT::f128, Expand);
> -  setOperationAction(ISD::FSINCOS,    MVT::f128, Expand);
> -  setOperationAction(ISD::FSQRT,      MVT::f128, Expand);
> -  setOperationAction(ISD::FSUB,       MVT::f128, Custom);
> -  setOperationAction(ISD::FTRUNC,     MVT::f128, Expand);
> -  setOperationAction(ISD::SETCC,      MVT::f128, Custom);
> -  setOperationAction(ISD::BR_CC,      MVT::f128, Custom);
> -  setOperationAction(ISD::SELECT,     MVT::f128, Expand);
> -  setOperationAction(ISD::SELECT_CC,  MVT::f128, Custom);
> -  setOperationAction(ISD::FP_EXTEND,  MVT::f128, Custom);
> -
> -  // Lowering for many of the conversions is actually specified by the non-f128
> -  // type. The LowerXXX function will be trivial when f128 isn't involved.
> -  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
> -  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
> -  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
> -  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
> -  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
> -  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
> -  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
> -  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
> -  setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
> -  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
> -  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
> -  setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
> -  setOperationAction(ISD::FP_ROUND,  MVT::f32, Custom);
> -  setOperationAction(ISD::FP_ROUND,  MVT::f64, Custom);
> -
> -  // i128 shift operation support
> -  setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
> -  setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
> -  setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
> -
> -  // This prevents LLVM trying to compress double constants into a floating
> -  // constant-pool entry and trying to load from there. It's of doubtful benefit
> -  // for A64: we'd need LDR followed by FCVT, I believe.
> -  setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
> -  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
> -  setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
> -
> -  setTruncStoreAction(MVT::f128, MVT::f64, Expand);
> -  setTruncStoreAction(MVT::f128, MVT::f32, Expand);
> -  setTruncStoreAction(MVT::f128, MVT::f16, Expand);
> -  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
> -  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
> -  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
> -
> -  setExceptionPointerRegister(AArch64::X0);
> -  setExceptionSelectorRegister(AArch64::X1);
> -
> -  if (Subtarget->hasNEON()) {
> -    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Expand);
> -    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
> -    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
> -    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v1i64, Expand);
> -    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v16i8, Expand);
> -    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Expand);
> -    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
> -    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Expand);
> -
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom);
> -    setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
> -
> -    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
> -    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
> -    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
> -    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
> -    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
> -    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
> -    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
> -    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
> -    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Custom);
> -    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
> -    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom);
> -    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
> -
> -    setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32, Legal);
> -    setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal);
> -    setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
> -    setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
> -    setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
> -    setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal);
> -    setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal);
> -
> -    setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i8, Custom);
> -    setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16, Custom);
> -    setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom);
> -    setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom);
> -    setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
> -
> -    setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
> -    setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
> -    setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
> -    setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
> -    setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
> -    setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
> -    setOperationAction(ISD::SETCC, MVT::v1i64, Custom);
> -    setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
> -    setOperationAction(ISD::SETCC, MVT::v2f32, Custom);
> -    setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
> -    setOperationAction(ISD::SETCC, MVT::v1f64, Custom);
> -    setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
> -
> -    setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
> -    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
> -    setOperationAction(ISD::FFLOOR, MVT::v1f64, Legal);
> -    setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
> -
> -    setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);
> -    setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
> -    setOperationAction(ISD::FCEIL, MVT::v1f64, Legal);
> -    setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
> -
> -    setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);
> -    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
> -    setOperationAction(ISD::FTRUNC, MVT::v1f64, Legal);
> -    setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
> -
> -    setOperationAction(ISD::FRINT, MVT::v2f32, Legal);
> -    setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
> -    setOperationAction(ISD::FRINT, MVT::v1f64, Legal);
> -    setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
> -
> -    setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal);
> -    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
> -    setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Legal);
> -    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
> -
> -    setOperationAction(ISD::FROUND, MVT::v2f32, Legal);
> -    setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
> -    setOperationAction(ISD::FROUND, MVT::v1f64, Legal);
> -    setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
> -
> -    setOperationAction(ISD::SINT_TO_FP, MVT::v1i8, Custom);
> -    setOperationAction(ISD::SINT_TO_FP, MVT::v1i16, Custom);
> -    setOperationAction(ISD::SINT_TO_FP, MVT::v1i32, Custom);
> -    setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
> -    setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
> -    setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
> -
> -    setOperationAction(ISD::UINT_TO_FP, MVT::v1i8, Custom);
> -    setOperationAction(ISD::UINT_TO_FP, MVT::v1i16, Custom);
> -    setOperationAction(ISD::UINT_TO_FP, MVT::v1i32, Custom);
> -    setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
> -    setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
> -    setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
> -
> -    setOperationAction(ISD::FP_TO_SINT, MVT::v1i8, Custom);
> -    setOperationAction(ISD::FP_TO_SINT, MVT::v1i16, Custom);
> -    setOperationAction(ISD::FP_TO_SINT, MVT::v1i32, Custom);
> -    setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
> -    setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
> -    setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Custom);
> -
> -    setOperationAction(ISD::FP_TO_UINT, MVT::v1i8, Custom);
> -    setOperationAction(ISD::FP_TO_UINT, MVT::v1i16, Custom);
> -    setOperationAction(ISD::FP_TO_UINT, MVT::v1i32, Custom);
> -    setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
> -    setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
> -    setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Custom);
> -
> -    // Neon does not support vector divide/remainder operations except
> -    // floating-point divide.
> -    setOperationAction(ISD::SDIV, MVT::v1i8, Expand);
> -    setOperationAction(ISD::SDIV, MVT::v8i8, Expand);
> -    setOperationAction(ISD::SDIV, MVT::v16i8, Expand);
> -    setOperationAction(ISD::SDIV, MVT::v1i16, Expand);
> -    setOperationAction(ISD::SDIV, MVT::v4i16, Expand);
> -    setOperationAction(ISD::SDIV, MVT::v8i16, Expand);
> -    setOperationAction(ISD::SDIV, MVT::v1i32, Expand);
> -    setOperationAction(ISD::SDIV, MVT::v2i32, Expand);
> -    setOperationAction(ISD::SDIV, MVT::v4i32, Expand);
> -    setOperationAction(ISD::SDIV, MVT::v1i64, Expand);
> -    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
> -
> -    setOperationAction(ISD::UDIV, MVT::v1i8, Expand);
> -    setOperationAction(ISD::UDIV, MVT::v8i8, Expand);
> -    setOperationAction(ISD::UDIV, MVT::v16i8, Expand);
> -    setOperationAction(ISD::UDIV, MVT::v1i16, Expand);
> -    setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
> -    setOperationAction(ISD::UDIV, MVT::v8i16, Expand);
> -    setOperationAction(ISD::UDIV, MVT::v1i32, Expand);
> -    setOperationAction(ISD::UDIV, MVT::v2i32, Expand);
> -    setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
> -    setOperationAction(ISD::UDIV, MVT::v1i64, Expand);
> -    setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
> -
> -    setOperationAction(ISD::SREM, MVT::v1i8, Expand);
> -    setOperationAction(ISD::SREM, MVT::v8i8, Expand);
> -    setOperationAction(ISD::SREM, MVT::v16i8, Expand);
> -    setOperationAction(ISD::SREM, MVT::v1i16, Expand);
> -    setOperationAction(ISD::SREM, MVT::v4i16, Expand);
> -    setOperationAction(ISD::SREM, MVT::v8i16, Expand);
> -    setOperationAction(ISD::SREM, MVT::v1i32, Expand);
> -    setOperationAction(ISD::SREM, MVT::v2i32, Expand);
> -    setOperationAction(ISD::SREM, MVT::v4i32, Expand);
> -    setOperationAction(ISD::SREM, MVT::v1i64, Expand);
> -    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
> -
> -    setOperationAction(ISD::UREM, MVT::v1i8, Expand);
> -    setOperationAction(ISD::UREM, MVT::v8i8, Expand);
> -    setOperationAction(ISD::UREM, MVT::v16i8, Expand);
> -    setOperationAction(ISD::UREM, MVT::v1i16, Expand);
> -    setOperationAction(ISD::UREM, MVT::v4i16, Expand);
> -    setOperationAction(ISD::UREM, MVT::v8i16, Expand);
> -    setOperationAction(ISD::UREM, MVT::v1i32, Expand);
> -    setOperationAction(ISD::UREM, MVT::v2i32, Expand);
> -    setOperationAction(ISD::UREM, MVT::v4i32, Expand);
> -    setOperationAction(ISD::UREM, MVT::v1i64, Expand);
> -    setOperationAction(ISD::UREM, MVT::v2i64, Expand);
> -
> -    setOperationAction(ISD::FREM, MVT::v2f32, Expand);
> -    setOperationAction(ISD::FREM, MVT::v4f32, Expand);
> -    setOperationAction(ISD::FREM, MVT::v1f64, Expand);
> -    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
> -
> -    setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
> -    setOperationAction(ISD::SELECT, MVT::v16i8, Expand);
> -    setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
> -    setOperationAction(ISD::SELECT, MVT::v8i16, Expand);
> -    setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
> -    setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
> -    setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
> -    setOperationAction(ISD::SELECT, MVT::v2i64, Expand);
> -    setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
> -    setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
> -    setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
> -    setOperationAction(ISD::SELECT, MVT::v2f64, Expand);
> -
> -    setOperationAction(ISD::SELECT_CC, MVT::v8i8, Custom);
> -    setOperationAction(ISD::SELECT_CC, MVT::v16i8, Custom);
> -    setOperationAction(ISD::SELECT_CC, MVT::v4i16, Custom);
> -    setOperationAction(ISD::SELECT_CC, MVT::v8i16, Custom);
> -    setOperationAction(ISD::SELECT_CC, MVT::v2i32, Custom);
> -    setOperationAction(ISD::SELECT_CC, MVT::v4i32, Custom);
> -    setOperationAction(ISD::SELECT_CC, MVT::v1i64, Custom);
> -    setOperationAction(ISD::SELECT_CC, MVT::v2i64, Custom);
> -    setOperationAction(ISD::SELECT_CC, MVT::v2f32, Custom);
> -    setOperationAction(ISD::SELECT_CC, MVT::v4f32, Custom);
> -    setOperationAction(ISD::SELECT_CC, MVT::v1f64, Custom);
> -    setOperationAction(ISD::SELECT_CC, MVT::v2f64, Custom);
> -
> -    // Vector ExtLoad and TruncStore are expanded.
> -    for (unsigned I = MVT::FIRST_VECTOR_VALUETYPE;
> -         I <= MVT::LAST_VECTOR_VALUETYPE; ++I) {
> -      MVT VT = (MVT::SimpleValueType) I;
> -      setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
> -      setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
> -      setLoadExtAction(ISD::EXTLOAD, VT, Expand);
> -      for (unsigned II = MVT::FIRST_VECTOR_VALUETYPE;
> -           II <= MVT::LAST_VECTOR_VALUETYPE; ++II) {
> -        MVT VT1 = (MVT::SimpleValueType) II;
> -        // A TruncStore has two vector types of the same number of elements
> -        // and different element sizes.
> -        if (VT.getVectorNumElements() == VT1.getVectorNumElements() &&
> -            VT.getVectorElementType().getSizeInBits()
> -                > VT1.getVectorElementType().getSizeInBits())
> -          setTruncStoreAction(VT, VT1, Expand);
> -      }
> -
> -      setOperationAction(ISD::MULHS, VT, Expand);
> -      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
> -      setOperationAction(ISD::MULHU, VT, Expand);
> -      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
> -
> -      setOperationAction(ISD::BSWAP, VT, Expand);
> -    }
> -
> -    // There is no v1i64/v2i64 multiply, expand v1i64/v2i64 to GPR i64 multiply.
> -    // FIXME: For a v2i64 multiply, we copy VPR to GPR and do 2 i64 multiplies,
> -    // and then copy back to VPR. This solution may be optimized by Following 3
> -    // NEON instructions:
> -    //        pmull  v2.1q, v0.1d, v1.1d
> -    //        pmull2 v3.1q, v0.2d, v1.2d
> -    //        ins    v2.d[1], v3.d[0]
> -    // As currently we can't verify the correctness of such assumption, we can
> -    // do such optimization in the future.
> -    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
> -    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
> -
> -    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
> -    setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
> -    setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
> -    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
> -    setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
> -    setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
> -    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
> -    setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
> -    setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
> -  }
> -
> -  setTargetDAGCombine(ISD::SIGN_EXTEND);
> -  setTargetDAGCombine(ISD::VSELECT);
> -
> -  MaskAndBranchFoldingIsLegal = true;
> -}
> -
> -EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
> -  // It's reasonably important that this value matches the "natural" legal
> -  // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself
> -  // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64).
> -  if (!VT.isVector()) return MVT::i32;
> -  return VT.changeVectorElementTypeToInteger();
> -}
> -
> -static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
> -                                  unsigned &LdrOpc,
> -                                  unsigned &StrOpc) {
> -  static const unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
> -                                       AArch64::LDXR_word, AArch64::LDXR_dword};
> -  static const unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
> -                                     AArch64::LDAXR_word, AArch64::LDAXR_dword};
> -  static const unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
> -                                       AArch64::STXR_word, AArch64::STXR_dword};
> -  static const unsigned StoreRels[] = {AArch64::STLXR_byte,AArch64::STLXR_hword,
> -                                     AArch64::STLXR_word, AArch64::STLXR_dword};
> -
> -  const unsigned *LoadOps, *StoreOps;
> -  if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
> -    LoadOps = LoadAcqs;
> -  else
> -    LoadOps = LoadBares;
> -
> -  if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
> -    StoreOps = StoreRels;
> -  else
> -    StoreOps = StoreBares;
> -
> -  assert(isPowerOf2_32(Size) && Size <= 8 &&
> -         "unsupported size for atomic binary op!");
> -
> -  LdrOpc = LoadOps[Log2_32(Size)];
> -  StrOpc = StoreOps[Log2_32(Size)];
> -}
> -
> -// FIXME: AArch64::DTripleRegClass and AArch64::QTripleRegClass don't really
> -// have value type mapped, and they are both being defined as MVT::untyped.
> -// Without knowing the MVT type, MachineLICM::getRegisterClassIDAndCost
> -// would fail to figure out the register pressure correctly.
> -std::pair<const TargetRegisterClass*, uint8_t>
> -AArch64TargetLowering::findRepresentativeClass(MVT VT) const{
> -  const TargetRegisterClass *RRC = nullptr;
> -  uint8_t Cost = 1;
> -  switch (VT.SimpleTy) {
> -  default:
> -    return TargetLowering::findRepresentativeClass(VT);
> -  case MVT::v4i64:
> -    RRC = &AArch64::QPairRegClass;
> -    Cost = 2;
> -    break;
> -  case MVT::v8i64:
> -    RRC = &AArch64::QQuadRegClass;
> -    Cost = 4;
> -    break;
> -  }
> -  return std::make_pair(RRC, Cost);
> -}
> -
> -MachineBasicBlock *
> -AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
> -                                        unsigned Size,
> -                                        unsigned BinOpcode) const {
> -  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
> -  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> -
> -  const BasicBlock *LLVM_BB = BB->getBasicBlock();
> -  MachineFunction *MF = BB->getParent();
> -  MachineFunction::iterator It = BB;
> -  ++It;
> -
> -  unsigned dest = MI->getOperand(0).getReg();
> -  unsigned ptr = MI->getOperand(1).getReg();
> -  unsigned incr = MI->getOperand(2).getReg();
> -  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
> -  DebugLoc dl = MI->getDebugLoc();
> -
> -  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
> -
> -  unsigned ldrOpc, strOpc;
> -  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
> -
> -  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
> -  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
> -  MF->insert(It, loopMBB);
> -  MF->insert(It, exitMBB);
> -
> -  // Transfer the remainder of BB and its successor edges to exitMBB.
> -  exitMBB->splice(exitMBB->begin(), BB,
> -                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
> -  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
> -
> -  const TargetRegisterClass *TRC
> -    = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
> -  unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
> -
> -  //  thisMBB:
> -  //   ...
> -  //   fallthrough --> loopMBB
> -  BB->addSuccessor(loopMBB);
> -
> -  //  loopMBB:
> -  //   ldxr dest, ptr
> -  //   <binop> scratch, dest, incr
> -  //   stxr stxr_status, scratch, ptr
> -  //   cbnz stxr_status, loopMBB
> -  //   fallthrough --> exitMBB
> -  BB = loopMBB;
> -  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
> -  if (BinOpcode) {
> -    // All arithmetic operations we'll be creating are designed to take an extra
> -    // shift or extend operand, which we can conveniently set to zero.
> -
> -    // Operand order needs to go the other way for NAND.
> -    if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl)
> -      BuildMI(BB, dl, TII->get(BinOpcode), scratch)
> -        .addReg(incr).addReg(dest).addImm(0);
> -    else
> -      BuildMI(BB, dl, TII->get(BinOpcode), scratch)
> -        .addReg(dest).addReg(incr).addImm(0);
> -  }
> -
> -  // From the stxr, the register is GPR32; from the cmp it's GPR32wsp
> -  unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
> -  MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
> -
> -  BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
> -  BuildMI(BB, dl, TII->get(AArch64::CBNZw))
> -    .addReg(stxr_status).addMBB(loopMBB);
> -
> -  BB->addSuccessor(loopMBB);
> -  BB->addSuccessor(exitMBB);
> -
> -  //  exitMBB:
> -  //   ...
> -  BB = exitMBB;
> -
> -  MI->eraseFromParent();   // The instruction is gone now.
> -
> -  return BB;
> -}
> -
> -MachineBasicBlock *
> -AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
> -                                              MachineBasicBlock *BB,
> -                                              unsigned Size,
> -                                              unsigned CmpOp,
> -                                              A64CC::CondCodes Cond) const {
> -  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> -
> -  const BasicBlock *LLVM_BB = BB->getBasicBlock();
> -  MachineFunction *MF = BB->getParent();
> -  MachineFunction::iterator It = BB;
> -  ++It;
> -
> -  unsigned dest = MI->getOperand(0).getReg();
> -  unsigned ptr = MI->getOperand(1).getReg();
> -  unsigned incr = MI->getOperand(2).getReg();
> -  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
> -
> -  unsigned oldval = dest;
> -  DebugLoc dl = MI->getDebugLoc();
> -
> -  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
> -  const TargetRegisterClass *TRC, *TRCsp;
> -  if (Size == 8) {
> -    TRC = &AArch64::GPR64RegClass;
> -    TRCsp = &AArch64::GPR64xspRegClass;
> -  } else {
> -    TRC = &AArch64::GPR32RegClass;
> -    TRCsp = &AArch64::GPR32wspRegClass;
> -  }
> -
> -  unsigned ldrOpc, strOpc;
> -  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
> -
> -  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
> -  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
> -  MF->insert(It, loopMBB);
> -  MF->insert(It, exitMBB);
> -
> -  // Transfer the remainder of BB and its successor edges to exitMBB.
> -  exitMBB->splice(exitMBB->begin(), BB,
> -                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
> -  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
> -
> -  unsigned scratch = MRI.createVirtualRegister(TRC);
> -  MRI.constrainRegClass(scratch, TRCsp);
> -
> -  //  thisMBB:
> -  //   ...
> -  //   fallthrough --> loopMBB
> -  BB->addSuccessor(loopMBB);
> -
> -  //  loopMBB:
> -  //   ldxr dest, ptr
> -  //   cmp incr, dest (, sign extend if necessary)
> -  //   csel scratch, dest, incr, cond
> -  //   stxr stxr_status, scratch, ptr
> -  //   cbnz stxr_status, loopMBB
> -  //   fallthrough --> exitMBB
> -  BB = loopMBB;
> -  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
> -
> -  // Build compare and cmov instructions.
> -  MRI.constrainRegClass(incr, TRCsp);
> -  BuildMI(BB, dl, TII->get(CmpOp))
> -    .addReg(incr).addReg(oldval).addImm(0);
> -
> -  BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc),
> -          scratch)
> -    .addReg(oldval).addReg(incr).addImm(Cond);
> -
> -  unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
> -  MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
> -
> -  BuildMI(BB, dl, TII->get(strOpc), stxr_status)
> -    .addReg(scratch).addReg(ptr);
> -  BuildMI(BB, dl, TII->get(AArch64::CBNZw))
> -    .addReg(stxr_status).addMBB(loopMBB);
> -
> -  BB->addSuccessor(loopMBB);
> -  BB->addSuccessor(exitMBB);
> -
> -  //  exitMBB:
> -  //   ...
> -  BB = exitMBB;
> -
> -  MI->eraseFromParent();   // The instruction is gone now.
> -
> -  return BB;
> -}
> -
> -MachineBasicBlock *
> -AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
> -                                         MachineBasicBlock *BB,
> -                                         unsigned Size) const {
> -  unsigned dest    = MI->getOperand(0).getReg();
> -  unsigned ptr     = MI->getOperand(1).getReg();
> -  unsigned oldval  = MI->getOperand(2).getReg();
> -  unsigned newval  = MI->getOperand(3).getReg();
> -  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
> -  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> -  DebugLoc dl = MI->getDebugLoc();
> -
> -  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
> -  const TargetRegisterClass *TRCsp;
> -  TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
> -
> -  unsigned ldrOpc, strOpc;
> -  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
> -
> -  MachineFunction *MF = BB->getParent();
> -  const BasicBlock *LLVM_BB = BB->getBasicBlock();
> -  MachineFunction::iterator It = BB;
> -  ++It; // insert the new blocks after the current block
> -
> -  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
> -  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
> -  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
> -  MF->insert(It, loop1MBB);
> -  MF->insert(It, loop2MBB);
> -  MF->insert(It, exitMBB);
> -
> -  // Transfer the remainder of BB and its successor edges to exitMBB.
> -  exitMBB->splice(exitMBB->begin(), BB,
> -                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
> -  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
> -
> -  //  thisMBB:
> -  //   ...
> -  //   fallthrough --> loop1MBB
> -  BB->addSuccessor(loop1MBB);
> -
> -  // loop1MBB:
> -  //   ldxr dest, [ptr]
> -  //   cmp dest, oldval
> -  //   b.ne exitMBB
> -  BB = loop1MBB;
> -  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
> -
> -  unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl;
> -  MRI.constrainRegClass(dest, TRCsp);
> -  BuildMI(BB, dl, TII->get(CmpOp))
> -    .addReg(dest).addReg(oldval).addImm(0);
> -  BuildMI(BB, dl, TII->get(AArch64::Bcc))
> -    .addImm(A64CC::NE).addMBB(exitMBB);
> -  BB->addSuccessor(loop2MBB);
> -  BB->addSuccessor(exitMBB);
> -
> -  // loop2MBB:
> -  //   strex stxr_status, newval, [ptr]
> -  //   cbnz stxr_status, loop1MBB
> -  BB = loop2MBB;
> -  unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
> -  MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
> -
> -  BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
> -  BuildMI(BB, dl, TII->get(AArch64::CBNZw))
> -    .addReg(stxr_status).addMBB(loop1MBB);
> -  BB->addSuccessor(loop1MBB);
> -  BB->addSuccessor(exitMBB);
> -
> -  //  exitMBB:
> -  //   ...
> -  BB = exitMBB;
> -
> -  MI->eraseFromParent();   // The instruction is gone now.
> -
> -  return BB;
> -}
> -
> -MachineBasicBlock *
> -AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
> -                                    MachineBasicBlock *MBB) const {
> -  // We materialise the F128CSEL pseudo-instruction using conditional branches
> -  // and loads, giving an instruciton sequence like:
> -  //     str q0, [sp]
> -  //     b.ne IfTrue
> -  //     b Finish
> -  // IfTrue:
> -  //     str q1, [sp]
> -  // Finish:
> -  //     ldr q0, [sp]
> -  //
> -  // Using virtual registers would probably not be beneficial since COPY
> -  // instructions are expensive for f128 (there's no actual instruction to
> -  // implement them).
> -  //
> -  // An alternative would be to do an integer-CSEL on some address. E.g.:
> -  //     mov x0, sp
> -  //     add x1, sp, #16
> -  //     str q0, [x0]
> -  //     str q1, [x1]
> -  //     csel x0, x0, x1, ne
> -  //     ldr q0, [x0]
> -  //
> -  // It's unclear which approach is actually optimal.
> -  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> -  MachineFunction *MF = MBB->getParent();
> -  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
> -  DebugLoc DL = MI->getDebugLoc();
> -  MachineFunction::iterator It = MBB;
> -  ++It;
> -
> -  unsigned DestReg = MI->getOperand(0).getReg();
> -  unsigned IfTrueReg = MI->getOperand(1).getReg();
> -  unsigned IfFalseReg = MI->getOperand(2).getReg();
> -  unsigned CondCode = MI->getOperand(3).getImm();
> -  bool NZCVKilled = MI->getOperand(4).isKill();
> -
> -  MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
> -  MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
> -  MF->insert(It, TrueBB);
> -  MF->insert(It, EndBB);
> -
> -  // Transfer rest of current basic-block to EndBB
> -  EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
> -                MBB->end());
> -  EndBB->transferSuccessorsAndUpdatePHIs(MBB);
> -
> -  // We need somewhere to store the f128 value needed.
> -  int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16);
> -
> -  //     [... start of incoming MBB ...]
> -  //     str qIFFALSE, [sp]
> -  //     b.cc IfTrue
> -  //     b Done
> -  BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR))
> -    .addReg(IfFalseReg)
> -    .addFrameIndex(ScratchFI)
> -    .addImm(0);
> -  BuildMI(MBB, DL, TII->get(AArch64::Bcc))
> -    .addImm(CondCode)
> -    .addMBB(TrueBB);
> -  BuildMI(MBB, DL, TII->get(AArch64::Bimm))
> -    .addMBB(EndBB);
> -  MBB->addSuccessor(TrueBB);
> -  MBB->addSuccessor(EndBB);
> -
> -  if (!NZCVKilled) {
> -    // NZCV is live-through TrueBB.
> -    TrueBB->addLiveIn(AArch64::NZCV);
> -    EndBB->addLiveIn(AArch64::NZCV);
> -  }
> -
> -  // IfTrue:
> -  //     str qIFTRUE, [sp]
> -  BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
> -    .addReg(IfTrueReg)
> -    .addFrameIndex(ScratchFI)
> -    .addImm(0);
> -
> -  // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the
> -  // blocks.
> -  TrueBB->addSuccessor(EndBB);
> -
> -  // Done:
> -  //     ldr qDEST, [sp]
> -  //     [... rest of incoming MBB ...]
> -  MachineInstr *StartOfEnd = EndBB->begin();
> -  BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
> -    .addFrameIndex(ScratchFI)
> -    .addImm(0);
> -
> -  MI->eraseFromParent();
> -  return EndBB;
> -}
> -
> -MachineBasicBlock *
> -AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
> -                                                 MachineBasicBlock *MBB) const {
> -  switch (MI->getOpcode()) {
> -  default: llvm_unreachable("Unhandled instruction with custom inserter");
> -  case AArch64::F128CSEL:
> -    return EmitF128CSEL(MI, MBB);
> -  case AArch64::ATOMIC_LOAD_ADD_I8:
> -    return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl);
> -  case AArch64::ATOMIC_LOAD_ADD_I16:
> -    return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl);
> -  case AArch64::ATOMIC_LOAD_ADD_I32:
> -    return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl);
> -  case AArch64::ATOMIC_LOAD_ADD_I64:
> -    return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl);
> -
> -  case AArch64::ATOMIC_LOAD_SUB_I8:
> -    return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl);
> -  case AArch64::ATOMIC_LOAD_SUB_I16:
> -    return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl);
> -  case AArch64::ATOMIC_LOAD_SUB_I32:
> -    return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl);
> -  case AArch64::ATOMIC_LOAD_SUB_I64:
> -    return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl);
> -
> -  case AArch64::ATOMIC_LOAD_AND_I8:
> -    return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl);
> -  case AArch64::ATOMIC_LOAD_AND_I16:
> -    return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl);
> -  case AArch64::ATOMIC_LOAD_AND_I32:
> -    return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl);
> -  case AArch64::ATOMIC_LOAD_AND_I64:
> -    return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl);
> -
> -  case AArch64::ATOMIC_LOAD_OR_I8:
> -    return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl);
> -  case AArch64::ATOMIC_LOAD_OR_I16:
> -    return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl);
> -  case AArch64::ATOMIC_LOAD_OR_I32:
> -    return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl);
> -  case AArch64::ATOMIC_LOAD_OR_I64:
> -    return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl);
> -
> -  case AArch64::ATOMIC_LOAD_XOR_I8:
> -    return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl);
> -  case AArch64::ATOMIC_LOAD_XOR_I16:
> -    return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl);
> -  case AArch64::ATOMIC_LOAD_XOR_I32:
> -    return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl);
> -  case AArch64::ATOMIC_LOAD_XOR_I64:
> -    return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl);
> -
> -  case AArch64::ATOMIC_LOAD_NAND_I8:
> -    return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl);
> -  case AArch64::ATOMIC_LOAD_NAND_I16:
> -    return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl);
> -  case AArch64::ATOMIC_LOAD_NAND_I32:
> -    return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl);
> -  case AArch64::ATOMIC_LOAD_NAND_I64:
> -    return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl);
> -
> -  case AArch64::ATOMIC_LOAD_MIN_I8:
> -    return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT);
> -  case AArch64::ATOMIC_LOAD_MIN_I16:
> -    return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT);
> -  case AArch64::ATOMIC_LOAD_MIN_I32:
> -    return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT);
> -  case AArch64::ATOMIC_LOAD_MIN_I64:
> -    return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT);
> -
> -  case AArch64::ATOMIC_LOAD_MAX_I8:
> -    return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT);
> -  case AArch64::ATOMIC_LOAD_MAX_I16:
> -    return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT);
> -  case AArch64::ATOMIC_LOAD_MAX_I32:
> -    return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT);
> -  case AArch64::ATOMIC_LOAD_MAX_I64:
> -    return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT);
> -
> -  case AArch64::ATOMIC_LOAD_UMIN_I8:
> -    return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI);
> -  case AArch64::ATOMIC_LOAD_UMIN_I16:
> -    return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI);
> -  case AArch64::ATOMIC_LOAD_UMIN_I32:
> -    return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI);
> -  case AArch64::ATOMIC_LOAD_UMIN_I64:
> -    return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI);
> -
> -  case AArch64::ATOMIC_LOAD_UMAX_I8:
> -    return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO);
> -  case AArch64::ATOMIC_LOAD_UMAX_I16:
> -    return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO);
> -  case AArch64::ATOMIC_LOAD_UMAX_I32:
> -    return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO);
> -  case AArch64::ATOMIC_LOAD_UMAX_I64:
> -    return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO);
> -
> -  case AArch64::ATOMIC_SWAP_I8:
> -    return emitAtomicBinary(MI, MBB, 1, 0);
> -  case AArch64::ATOMIC_SWAP_I16:
> -    return emitAtomicBinary(MI, MBB, 2, 0);
> -  case AArch64::ATOMIC_SWAP_I32:
> -    return emitAtomicBinary(MI, MBB, 4, 0);
> -  case AArch64::ATOMIC_SWAP_I64:
> -    return emitAtomicBinary(MI, MBB, 8, 0);
> -
> -  case AArch64::ATOMIC_CMP_SWAP_I8:
> -    return emitAtomicCmpSwap(MI, MBB, 1);
> -  case AArch64::ATOMIC_CMP_SWAP_I16:
> -    return emitAtomicCmpSwap(MI, MBB, 2);
> -  case AArch64::ATOMIC_CMP_SWAP_I32:
> -    return emitAtomicCmpSwap(MI, MBB, 4);
> -  case AArch64::ATOMIC_CMP_SWAP_I64:
> -    return emitAtomicCmpSwap(MI, MBB, 8);
> -  }
> -}
> -
> -
> -const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
> -  switch (Opcode) {
> -  case AArch64ISD::BR_CC:          return "AArch64ISD::BR_CC";
> -  case AArch64ISD::Call:           return "AArch64ISD::Call";
> -  case AArch64ISD::FPMOV:          return "AArch64ISD::FPMOV";
> -  case AArch64ISD::GOTLoad:        return "AArch64ISD::GOTLoad";
> -  case AArch64ISD::BFI:            return "AArch64ISD::BFI";
> -  case AArch64ISD::EXTR:           return "AArch64ISD::EXTR";
> -  case AArch64ISD::Ret:            return "AArch64ISD::Ret";
> -  case AArch64ISD::SBFX:           return "AArch64ISD::SBFX";
> -  case AArch64ISD::SELECT_CC:      return "AArch64ISD::SELECT_CC";
> -  case AArch64ISD::SETCC:          return "AArch64ISD::SETCC";
> -  case AArch64ISD::TC_RETURN:      return "AArch64ISD::TC_RETURN";
> -  case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
> -  case AArch64ISD::TLSDESCCALL:    return "AArch64ISD::TLSDESCCALL";
> -  case AArch64ISD::WrapperLarge:   return "AArch64ISD::WrapperLarge";
> -  case AArch64ISD::WrapperSmall:   return "AArch64ISD::WrapperSmall";
> -
> -  case AArch64ISD::NEON_MOVIMM:
> -    return "AArch64ISD::NEON_MOVIMM";
> -  case AArch64ISD::NEON_MVNIMM:
> -    return "AArch64ISD::NEON_MVNIMM";
> -  case AArch64ISD::NEON_FMOVIMM:
> -    return "AArch64ISD::NEON_FMOVIMM";
> -  case AArch64ISD::NEON_CMP:
> -    return "AArch64ISD::NEON_CMP";
> -  case AArch64ISD::NEON_CMPZ:
> -    return "AArch64ISD::NEON_CMPZ";
> -  case AArch64ISD::NEON_TST:
> -    return "AArch64ISD::NEON_TST";
> -  case AArch64ISD::NEON_QSHLs:
> -    return "AArch64ISD::NEON_QSHLs";
> -  case AArch64ISD::NEON_QSHLu:
> -    return "AArch64ISD::NEON_QSHLu";
> -  case AArch64ISD::NEON_VDUP:
> -    return "AArch64ISD::NEON_VDUP";
> -  case AArch64ISD::NEON_VDUPLANE:
> -    return "AArch64ISD::NEON_VDUPLANE";
> -  case AArch64ISD::NEON_REV16:
> -    return "AArch64ISD::NEON_REV16";
> -  case AArch64ISD::NEON_REV32:
> -    return "AArch64ISD::NEON_REV32";
> -  case AArch64ISD::NEON_REV64:
> -    return "AArch64ISD::NEON_REV64";
> -  case AArch64ISD::NEON_UZP1:
> -    return "AArch64ISD::NEON_UZP1";
> -  case AArch64ISD::NEON_UZP2:
> -    return "AArch64ISD::NEON_UZP2";
> -  case AArch64ISD::NEON_ZIP1:
> -    return "AArch64ISD::NEON_ZIP1";
> -  case AArch64ISD::NEON_ZIP2:
> -    return "AArch64ISD::NEON_ZIP2";
> -  case AArch64ISD::NEON_TRN1:
> -    return "AArch64ISD::NEON_TRN1";
> -  case AArch64ISD::NEON_TRN2:
> -    return "AArch64ISD::NEON_TRN2";
> -  case AArch64ISD::NEON_LD1_UPD:
> -    return "AArch64ISD::NEON_LD1_UPD";
> -  case AArch64ISD::NEON_LD2_UPD:
> -    return "AArch64ISD::NEON_LD2_UPD";
> -  case AArch64ISD::NEON_LD3_UPD:
> -    return "AArch64ISD::NEON_LD3_UPD";
> -  case AArch64ISD::NEON_LD4_UPD:
> -    return "AArch64ISD::NEON_LD4_UPD";
> -  case AArch64ISD::NEON_ST1_UPD:
> -    return "AArch64ISD::NEON_ST1_UPD";
> -  case AArch64ISD::NEON_ST2_UPD:
> -    return "AArch64ISD::NEON_ST2_UPD";
> -  case AArch64ISD::NEON_ST3_UPD:
> -    return "AArch64ISD::NEON_ST3_UPD";
> -  case AArch64ISD::NEON_ST4_UPD:
> -    return "AArch64ISD::NEON_ST4_UPD";
> -  case AArch64ISD::NEON_LD1x2_UPD:
> -    return "AArch64ISD::NEON_LD1x2_UPD";
> -  case AArch64ISD::NEON_LD1x3_UPD:
> -    return "AArch64ISD::NEON_LD1x3_UPD";
> -  case AArch64ISD::NEON_LD1x4_UPD:
> -    return "AArch64ISD::NEON_LD1x4_UPD";
> -  case AArch64ISD::NEON_ST1x2_UPD:
> -    return "AArch64ISD::NEON_ST1x2_UPD";
> -  case AArch64ISD::NEON_ST1x3_UPD:
> -    return "AArch64ISD::NEON_ST1x3_UPD";
> -  case AArch64ISD::NEON_ST1x4_UPD:
> -    return "AArch64ISD::NEON_ST1x4_UPD";
> -  case AArch64ISD::NEON_LD2DUP:
> -    return "AArch64ISD::NEON_LD2DUP";
> -  case AArch64ISD::NEON_LD3DUP:
> -    return "AArch64ISD::NEON_LD3DUP";
> -  case AArch64ISD::NEON_LD4DUP:
> -    return "AArch64ISD::NEON_LD4DUP";
> -  case AArch64ISD::NEON_LD2DUP_UPD:
> -    return "AArch64ISD::NEON_LD2DUP_UPD";
> -  case AArch64ISD::NEON_LD3DUP_UPD:
> -    return "AArch64ISD::NEON_LD3DUP_UPD";
> -  case AArch64ISD::NEON_LD4DUP_UPD:
> -    return "AArch64ISD::NEON_LD4DUP_UPD";
> -  case AArch64ISD::NEON_LD2LN_UPD:
> -    return "AArch64ISD::NEON_LD2LN_UPD";
> -  case AArch64ISD::NEON_LD3LN_UPD:
> -    return "AArch64ISD::NEON_LD3LN_UPD";
> -  case AArch64ISD::NEON_LD4LN_UPD:
> -    return "AArch64ISD::NEON_LD4LN_UPD";
> -  case AArch64ISD::NEON_ST2LN_UPD:
> -    return "AArch64ISD::NEON_ST2LN_UPD";
> -  case AArch64ISD::NEON_ST3LN_UPD:
> -    return "AArch64ISD::NEON_ST3LN_UPD";
> -  case AArch64ISD::NEON_ST4LN_UPD:
> -    return "AArch64ISD::NEON_ST4LN_UPD";
> -  case AArch64ISD::NEON_VEXTRACT:
> -    return "AArch64ISD::NEON_VEXTRACT";
> -  default:
> -    return nullptr;
> -  }
> -}
> -
> -static const MCPhysReg AArch64FPRArgRegs[] = {
> -  AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
> -  AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7
> -};
> -static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs);
> -
> -static const MCPhysReg AArch64ArgRegs[] = {
> -  AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
> -  AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7
> -};
> -static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs);
> -
> -static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
> -                                 CCValAssign::LocInfo LocInfo,
> -                                 ISD::ArgFlagsTy ArgFlags, CCState &State) {
> -  // Mark all remaining general purpose registers as allocated. We don't
> -  // backtrack: if (for example) an i128 gets put on the stack, no subsequent
> -  // i64 will go in registers (C.11).
> -  for (unsigned i = 0; i < NumArgRegs; ++i)
> -    State.AllocateReg(AArch64ArgRegs[i]);
> -
> -  return false;
> -}
> -
> -#include "AArch64GenCallingConv.inc"
> -
> -CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
> -
> -  switch(CC) {
> -  default: llvm_unreachable("Unsupported calling convention");
> -  case CallingConv::Fast:
> -  case CallingConv::C:
> -    return CC_A64_APCS;
> -  }
> -}
> -
> -void
> -AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
> -                                           SDLoc DL, SDValue &Chain) const {
> -  MachineFunction &MF = DAG.getMachineFunction();
> -  MachineFrameInfo *MFI = MF.getFrameInfo();
> -  AArch64MachineFunctionInfo *FuncInfo
> -    = MF.getInfo<AArch64MachineFunctionInfo>();
> -
> -  SmallVector<SDValue, 8> MemOps;
> -
> -  unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs,
> -                                                         NumArgRegs);
> -  unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs,
> -                                                         NumFPRArgRegs);
> -
> -  unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR);
> -  int GPRIdx = 0;
> -  if (GPRSaveSize != 0) {
> -    GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
> -
> -    SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
> -
> -    for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) {
> -      unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass);
> -      SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
> -      SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
> -                                   MachinePointerInfo::getStack(i * 8),
> -                                   false, false, 0);
> -      MemOps.push_back(Store);
> -      FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
> -                        DAG.getConstant(8, getPointerTy()));
> -    }
> -  }
> -
> -  if (getSubtarget()->hasFPARMv8()) {
> -  unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
> -  int FPRIdx = 0;
> -    // According to the AArch64 Procedure Call Standard, section B.1/B.3, we
> -    // can omit a register save area if we know we'll never use registers of
> -    // that class.
> -    if (FPRSaveSize != 0) {
> -      FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
> -
> -      SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
> -
> -      for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
> -        unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
> -            &AArch64::FPR128RegClass);
> -        SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
> -        SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
> -            MachinePointerInfo::getStack(i * 16),
> -            false, false, 0);
> -        MemOps.push_back(Store);
> -        FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
> -            DAG.getConstant(16, getPointerTy()));
> -      }
> -    }
> -    FuncInfo->setVariadicFPRIdx(FPRIdx);
> -    FuncInfo->setVariadicFPRSize(FPRSaveSize);
> -  }
> -
> -  unsigned StackOffset = RoundUpToAlignment(CCInfo.getNextStackOffset(), 8);
> -  int StackIdx = MFI->CreateFixedObject(8, StackOffset, true);
> -
> -  FuncInfo->setVariadicStackIdx(StackIdx);
> -  FuncInfo->setVariadicGPRIdx(GPRIdx);
> -  FuncInfo->setVariadicGPRSize(GPRSaveSize);
> -
> -  if (!MemOps.empty()) {
> -    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
> -  }
> -}
> -
> -
> -SDValue
> -AArch64TargetLowering::LowerFormalArguments(SDValue Chain,
> -                                      CallingConv::ID CallConv, bool isVarArg,
> -                                      const SmallVectorImpl<ISD::InputArg> &Ins,
> -                                      SDLoc dl, SelectionDAG &DAG,
> -                                      SmallVectorImpl<SDValue> &InVals) const {
> -  MachineFunction &MF = DAG.getMachineFunction();
> -  AArch64MachineFunctionInfo *FuncInfo
> -    = MF.getInfo<AArch64MachineFunctionInfo>();
> -  MachineFrameInfo *MFI = MF.getFrameInfo();
> -  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
> -
> -  SmallVector<CCValAssign, 16> ArgLocs;
> -  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
> -                 getTargetMachine(), ArgLocs, *DAG.getContext());
> -  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
> -
> -  SmallVector<SDValue, 16> ArgValues;
> -
> -  SDValue ArgValue;
> -  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
> -    CCValAssign &VA = ArgLocs[i];
> -    ISD::ArgFlagsTy Flags = Ins[i].Flags;
> -
> -    if (Flags.isByVal()) {
> -      // Byval is used for small structs and HFAs in the PCS, but the system
> -      // should work in a non-compliant manner for larger structs.
> -      EVT PtrTy = getPointerTy();
> -      int Size = Flags.getByValSize();
> -      unsigned NumRegs = (Size + 7) / 8;
> -
> -      uint32_t BEAlign = 0;
> -      if (Size < 8 && !getSubtarget()->isLittle())
> -        BEAlign = 8-Size;
> -      unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs,
> -                                                 VA.getLocMemOffset() + BEAlign,
> -                                                 false);
> -      SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
> -      InVals.push_back(FrameIdxN);
> -
> -      continue;
> -    } else if (VA.isRegLoc()) {
> -      MVT RegVT = VA.getLocVT();
> -      const TargetRegisterClass *RC = getRegClassFor(RegVT);
> -      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
> -
> -      ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
> -    } else { // VA.isRegLoc()
> -      assert(VA.isMemLoc());
> -
> -      int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
> -                                      VA.getLocMemOffset(), true);
> -
> -      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
> -      ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
> -                             MachinePointerInfo::getFixedStack(FI),
> -                             false, false, false, 0);
> -
> -
> -    }
> -
> -    switch (VA.getLocInfo()) {
> -    default: llvm_unreachable("Unknown loc info!");
> -    case CCValAssign::Full: break;
> -    case CCValAssign::BCvt:
> -      ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue);
> -      break;
> -    case CCValAssign::SExt:
> -    case CCValAssign::ZExt:
> -    case CCValAssign::AExt:
> -    case CCValAssign::FPExt: {
> -      unsigned DestSize = VA.getValVT().getSizeInBits();
> -      unsigned DestSubReg;
> -
> -      switch (DestSize) {
> -      case 8: DestSubReg = AArch64::sub_8; break;
> -      case 16: DestSubReg = AArch64::sub_16; break;
> -      case 32: DestSubReg = AArch64::sub_32; break;
> -      case 64: DestSubReg = AArch64::sub_64; break;
> -      default: llvm_unreachable("Unexpected argument promotion");
> -      }
> -
> -      ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
> -                                   VA.getValVT(), ArgValue,
> -                                   DAG.getTargetConstant(DestSubReg, MVT::i32)),
> -                         0);
> -      break;
> -    }
> -    }
> -
> -    InVals.push_back(ArgValue);
> -  }
> -
> -  if (isVarArg)
> -    SaveVarArgRegisters(CCInfo, DAG, dl, Chain);
> -
> -  unsigned StackArgSize = CCInfo.getNextStackOffset();
> -  if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
> -    // This is a non-standard ABI so by fiat I say we're allowed to make full
> -    // use of the stack area to be popped, which must be aligned to 16 bytes in
> -    // any case:
> -    StackArgSize = RoundUpToAlignment(StackArgSize, 16);
> -
> -    // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
> -    // a multiple of 16.
> -    FuncInfo->setArgumentStackToRestore(StackArgSize);
> -
> -    // This realignment carries over to the available bytes below. Our own
> -    // callers will guarantee the space is free by giving an aligned value to
> -    // CALLSEQ_START.
> -  }
> -  // Even if we're not expected to free up the space, it's useful to know how
> -  // much is there while considering tail calls (because we can reuse it).
> -  FuncInfo->setBytesInStackArgArea(StackArgSize);
> -
> -  return Chain;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerReturn(SDValue Chain,
> -                                   CallingConv::ID CallConv, bool isVarArg,
> -                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
> -                                   const SmallVectorImpl<SDValue> &OutVals,
> -                                   SDLoc dl, SelectionDAG &DAG) const {
> -  // CCValAssign - represent the assignment of the return value to a location.
> -  SmallVector<CCValAssign, 16> RVLocs;
> -
> -  // CCState - Info about the registers and stack slots.
> -  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
> -                 getTargetMachine(), RVLocs, *DAG.getContext());
> -
> -  // Analyze outgoing return values.
> -  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv));
> -
> -  SDValue Flag;
> -  SmallVector<SDValue, 4> RetOps(1, Chain);
> -
> -  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
> -    // PCS: "If the type, T, of the result of a function is such that
> -    // void func(T arg) would require that arg be passed as a value in a
> -    // register (or set of registers) according to the rules in 5.4, then the
> -    // result is returned in the same registers as would be used for such an
> -    // argument.
> -    //
> -    // Otherwise, the caller shall reserve a block of memory of sufficient
> -    // size and alignment to hold the result. The address of the memory block
> -    // shall be passed as an additional argument to the function in x8."
> -    //
> -    // This is implemented in two places. The register-return values are dealt
> -    // with here, more complex returns are passed as an sret parameter, which
> -    // means we don't have to worry about it during actual return.
> -    CCValAssign &VA = RVLocs[i];
> -    assert(VA.isRegLoc() && "Only register-returns should be created by PCS");
> -
> -
> -    SDValue Arg = OutVals[i];
> -
> -    // There's no convenient note in the ABI about this as there is for normal
> -    // arguments, but it says return values are passed in the same registers as
> -    // an argument would be. I believe that includes the comments about
> -    // unspecified higher bits, putting the burden of widening on the *caller*
> -    // for return values.
> -    switch (VA.getLocInfo()) {
> -    default: llvm_unreachable("Unknown loc info");
> -    case CCValAssign::Full: break;
> -    case CCValAssign::SExt:
> -    case CCValAssign::ZExt:
> -    case CCValAssign::AExt:
> -      // Floating-point values should only be extended when they're going into
> -      // memory, which can't happen here so an integer extend is acceptable.
> -      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
> -      break;
> -    case CCValAssign::BCvt:
> -      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
> -      break;
> -    }
> -
> -    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
> -    Flag = Chain.getValue(1);
> -    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
> -  }
> -
> -  RetOps[0] = Chain;  // Update chain.
> -
> -  // Add the flag if we have it.
> -  if (Flag.getNode())
> -    RetOps.push_back(Flag);
> -
> -  return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, RetOps);
> -}
> -
> -unsigned AArch64TargetLowering::getByValTypeAlignment(Type *Ty) const {
> -  // This is a new backend. For anything more precise than this a FE should
> -  // set an explicit alignment.
> -  return 4;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
> -                                 SmallVectorImpl<SDValue> &InVals) const {
> -  SelectionDAG &DAG                     = CLI.DAG;
> -  SDLoc &dl                             = CLI.DL;
> -  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
> -  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
> -  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
> -  SDValue Chain                         = CLI.Chain;
> -  SDValue Callee                        = CLI.Callee;
> -  bool &IsTailCall                      = CLI.IsTailCall;
> -  CallingConv::ID CallConv              = CLI.CallConv;
> -  bool IsVarArg                         = CLI.IsVarArg;
> -
> -  MachineFunction &MF = DAG.getMachineFunction();
> -  AArch64MachineFunctionInfo *FuncInfo
> -    = MF.getInfo<AArch64MachineFunctionInfo>();
> -  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
> -  bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet();
> -  bool IsSibCall = false;
> -
> -  if (IsTailCall) {
> -    IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
> -                    IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
> -                                                   Outs, OutVals, Ins, DAG);
> -
> -    if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall())
> -      report_fatal_error("failed to perform tail call elimination on a call "
> -                         "site marked musttail");
> -
> -    // A sibling call is one where we're under the usual C ABI and not planning
> -    // to change that but can still do a tail call:
> -    if (!TailCallOpt && IsTailCall)
> -      IsSibCall = true;
> -  }
> -
> -  SmallVector<CCValAssign, 16> ArgLocs;
> -  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
> -                 getTargetMachine(), ArgLocs, *DAG.getContext());
> -  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
> -
> -  // On AArch64 (and all other architectures I'm aware of) the most this has to
> -  // do is adjust the stack pointer.
> -  unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16);
> -  if (IsSibCall) {
> -    // Since we're not changing the ABI to make this a tail call, the memory
> -    // operands are already available in the caller's incoming argument space.
> -    NumBytes = 0;
> -  }
> -
> -  // FPDiff is the byte offset of the call's argument area from the callee's.
> -  // Stores to callee stack arguments will be placed in FixedStackSlots offset
> -  // by this amount for a tail call. In a sibling call it must be 0 because the
> -  // caller will deallocate the entire stack and the callee still expects its
> -  // arguments to begin at SP+0. Completely unused for non-tail calls.
> -  int FPDiff = 0;
> -
> -  if (IsTailCall && !IsSibCall) {
> -    unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
> -
> -    // FPDiff will be negative if this tail call requires more space than we
> -    // would automatically have in our incoming argument space. Positive if we
> -    // can actually shrink the stack.
> -    FPDiff = NumReusableBytes - NumBytes;
> -
> -    // The stack pointer must be 16-byte aligned at all times it's used for a
> -    // memory operation, which in practice means at *all* times and in
> -    // particular across call boundaries. Therefore our own arguments started at
> -    // a 16-byte aligned SP and the delta applied for the tail call should
> -    // satisfy the same constraint.
> -    assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
> -  }
> -
> -  if (!IsSibCall)
> -    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
> -                                 dl);
> -
> -  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP,
> -                                        getPointerTy());
> -
> -  SmallVector<SDValue, 8> MemOpChains;
> -  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
> -
> -  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
> -    CCValAssign &VA = ArgLocs[i];
> -    ISD::ArgFlagsTy Flags = Outs[i].Flags;
> -    SDValue Arg = OutVals[i];
> -
> -    // Callee does the actual widening, so all extensions just use an implicit
> -    // definition of the rest of the Loc. Aesthetically, this would be nicer as
> -    // an ANY_EXTEND, but that isn't valid for floating-point types and this
> -    // alternative works on integer types too.
> -    switch (VA.getLocInfo()) {
> -    default: llvm_unreachable("Unknown loc info!");
> -    case CCValAssign::Full: break;
> -    case CCValAssign::SExt:
> -    case CCValAssign::ZExt:
> -    case CCValAssign::AExt:
> -    case CCValAssign::FPExt: {
> -      unsigned SrcSize = VA.getValVT().getSizeInBits();
> -      unsigned SrcSubReg;
> -
> -      switch (SrcSize) {
> -      case 8: SrcSubReg = AArch64::sub_8; break;
> -      case 16: SrcSubReg = AArch64::sub_16; break;
> -      case 32: SrcSubReg = AArch64::sub_32; break;
> -      case 64: SrcSubReg = AArch64::sub_64; break;
> -      default: llvm_unreachable("Unexpected argument promotion");
> -      }
> -
> -      Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
> -                                    VA.getLocVT(),
> -                                    DAG.getUNDEF(VA.getLocVT()),
> -                                    Arg,
> -                                    DAG.getTargetConstant(SrcSubReg, MVT::i32)),
> -                    0);
> -
> -      break;
> -    }
> -    case CCValAssign::BCvt:
> -      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
> -      break;
> -    }
> -
> -    if (VA.isRegLoc()) {
> -      // A normal register (sub-) argument. For now we just note it down because
> -      // we want to copy things into registers as late as possible to avoid
> -      // register-pressure (and possibly worse).
> -      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
> -      continue;
> -    }
> -
> -    assert(VA.isMemLoc() && "unexpected argument location");
> -
> -    SDValue DstAddr;
> -    MachinePointerInfo DstInfo;
> -    if (IsTailCall) {
> -      uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() :
> -                                          VA.getLocVT().getSizeInBits();
> -      OpSize = (OpSize + 7) / 8;
> -      int32_t Offset = VA.getLocMemOffset() + FPDiff;
> -      int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
> -
> -      DstAddr = DAG.getFrameIndex(FI, getPointerTy());
> -      DstInfo = MachinePointerInfo::getFixedStack(FI);
> -
> -      // Make sure any stack arguments overlapping with where we're storing are
> -      // loaded before this eventual operation. Otherwise they'll be clobbered.
> -      Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
> -    } else {
> -      uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize()*8 :
> -                                          VA.getLocVT().getSizeInBits();
> -      OpSize = (OpSize + 7) / 8;
> -      uint32_t BEAlign = 0;
> -      if (OpSize < 8 && !getSubtarget()->isLittle())
> -        BEAlign = 8-OpSize;
> -      SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + BEAlign);
> -
> -      DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
> -      DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset());
> -    }
> -
> -    if (Flags.isByVal()) {
> -      SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64);
> -      SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode,
> -                                  Flags.getByValAlign(),
> -                                  /*isVolatile = */ false,
> -                                  /*alwaysInline = */ false,
> -                                  DstInfo, MachinePointerInfo());
> -      MemOpChains.push_back(Cpy);
> -    } else {
> -      // Normal stack argument, put it where it's needed.
> -      SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo,
> -                                   false, false, 0);
> -      MemOpChains.push_back(Store);
> -    }
> -  }
> -
> -  // The loads and stores generated above shouldn't clash with each
> -  // other. Combining them with this TokenFactor notes that fact for the rest of
> -  // the backend.
> -  if (!MemOpChains.empty())
> -    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
> -
> -  // Most of the rest of the instructions need to be glued together; we don't
> -  // want assignments to actual registers used by a call to be rearranged by a
> -  // well-meaning scheduler.
> -  SDValue InFlag;
> -
> -  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
> -    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
> -                             RegsToPass[i].second, InFlag);
> -    InFlag = Chain.getValue(1);
> -  }
> -
> -  // The linker is responsible for inserting veneers when necessary to put a
> -  // function call destination in range, so we don't need to bother with a
> -  // wrapper here.
> -  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
> -    const GlobalValue *GV = G->getGlobal();
> -    Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
> -  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
> -    const char *Sym = S->getSymbol();
> -    Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
> -  }
> -
> -  // We don't usually want to end the call-sequence here because we would tidy
> -  // the frame up *after* the call, however in the ABI-changing tail-call case
> -  // we've carefully laid out the parameters so that when sp is reset they'll be
> -  // in the correct location.
> -  if (IsTailCall && !IsSibCall) {
> -    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
> -                               DAG.getIntPtrConstant(0, true), InFlag, dl);
> -    InFlag = Chain.getValue(1);
> -  }
> -
> -  // We produce the following DAG scheme for the actual call instruction:
> -  //     (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag?
> -  //
> -  // Most arguments aren't going to be used and just keep the values live as
> -  // far as LLVM is concerned. It's expected to be selected as simply "bl
> -  // callee" (for a direct, non-tail call).
> -  std::vector<SDValue> Ops;
> -  Ops.push_back(Chain);
> -  Ops.push_back(Callee);
> -
> -  if (IsTailCall) {
> -    // Each tail call may have to adjust the stack by a different amount, so
> -    // this information must travel along with the operation for eventual
> -    // consumption by emitEpilogue.
> -    Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
> -  }
> -
> -  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
> -    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
> -                                  RegsToPass[i].second.getValueType()));
> -
> -
> -  // Add a register mask operand representing the call-preserved registers. This
> -  // is used later in codegen to constrain register-allocation.
> -  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
> -  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
> -  assert(Mask && "Missing call preserved mask for calling convention");
> -  Ops.push_back(DAG.getRegisterMask(Mask));
> -
> -  // If we needed glue, put it in as the last argument.
> -  if (InFlag.getNode())
> -    Ops.push_back(InFlag);
> -
> -  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
> -
> -  if (IsTailCall) {
> -    return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, Ops);
> -  }
> -
> -  Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, Ops);
> -  InFlag = Chain.getValue(1);
> -
> -  // Now we can reclaim the stack, just as well do it before working out where
> -  // our return value is.
> -  if (!IsSibCall) {
> -    uint64_t CalleePopBytes
> -      = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0;
> -
> -    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
> -                               DAG.getIntPtrConstant(CalleePopBytes, true),
> -                               InFlag, dl);
> -    InFlag = Chain.getValue(1);
> -  }
> -
> -  return LowerCallResult(Chain, InFlag, CallConv,
> -                         IsVarArg, Ins, dl, DAG, InVals);
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
> -                                      CallingConv::ID CallConv, bool IsVarArg,
> -                                      const SmallVectorImpl<ISD::InputArg> &Ins,
> -                                      SDLoc dl, SelectionDAG &DAG,
> -                                      SmallVectorImpl<SDValue> &InVals) const {
> -  // Assign locations to each value returned by this call.
> -  SmallVector<CCValAssign, 16> RVLocs;
> -  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
> -                 getTargetMachine(), RVLocs, *DAG.getContext());
> -  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv));
> -
> -  for (unsigned i = 0; i != RVLocs.size(); ++i) {
> -    CCValAssign VA = RVLocs[i];
> -
> -    // Return values that are too big to fit into registers should use an sret
> -    // pointer, so this can be a lot simpler than the main argument code.
> -    assert(VA.isRegLoc() && "Memory locations not expected for call return");
> -
> -    SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
> -                                     InFlag);
> -    Chain = Val.getValue(1);
> -    InFlag = Val.getValue(2);
> -
> -    switch (VA.getLocInfo()) {
> -    default: llvm_unreachable("Unknown loc info!");
> -    case CCValAssign::Full: break;
> -    case CCValAssign::BCvt:
> -      Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
> -      break;
> -    case CCValAssign::ZExt:
> -    case CCValAssign::SExt:
> -    case CCValAssign::AExt:
> -      // Floating-point arguments only get extended/truncated if they're going
> -      // in memory, so using the integer operation is acceptable here.
> -      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
> -      break;
> -    }
> -
> -    InVals.push_back(Val);
> -  }
> -
> -  return Chain;
> -}
> -
> -bool
> -AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
> -                                    CallingConv::ID CalleeCC,
> -                                    bool IsVarArg,
> -                                    bool IsCalleeStructRet,
> -                                    bool IsCallerStructRet,
> -                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
> -                                    const SmallVectorImpl<SDValue> &OutVals,
> -                                    const SmallVectorImpl<ISD::InputArg> &Ins,
> -                                    SelectionDAG& DAG) const {
> -
> -  // For CallingConv::C this function knows whether the ABI needs
> -  // changing. That's not true for other conventions so they will have to opt in
> -  // manually.
> -  if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
> -    return false;
> -
> -  const MachineFunction &MF = DAG.getMachineFunction();
> -  const Function *CallerF = MF.getFunction();
> -  CallingConv::ID CallerCC = CallerF->getCallingConv();
> -  bool CCMatch = CallerCC == CalleeCC;
> -
> -  // Byval parameters hand the function a pointer directly into the stack area
> -  // we want to reuse during a tail call. Working around this *is* possible (see
> -  // X86) but less efficient and uglier in LowerCall.
> -  for (Function::const_arg_iterator i = CallerF->arg_begin(),
> -         e = CallerF->arg_end(); i != e; ++i)
> -    if (i->hasByValAttr())
> -      return false;
> -
> -  if (getTargetMachine().Options.GuaranteedTailCallOpt) {
> -    if (IsTailCallConvention(CalleeCC) && CCMatch)
> -      return true;
> -    return false;
> -  }
> -
> -  // Now we search for cases where we can use a tail call without changing the
> -  // ABI. Sibcall is used in some places (particularly gcc) to refer to this
> -  // concept.
> -
> -  // I want anyone implementing a new calling convention to think long and hard
> -  // about this assert.
> -  assert((!IsVarArg || CalleeCC == CallingConv::C)
> -         && "Unexpected variadic calling convention");
> -
> -  if (IsVarArg && !Outs.empty()) {
> -    // At least two cases here: if caller is fastcc then we can't have any
> -    // memory arguments (we'd be expected to clean up the stack afterwards). If
> -    // caller is C then we could potentially use its argument area.
> -
> -    // FIXME: for now we take the most conservative of these in both cases:
> -    // disallow all variadic memory operands.
> -    SmallVector<CCValAssign, 16> ArgLocs;
> -    CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
> -                   getTargetMachine(), ArgLocs, *DAG.getContext());
> -
> -    CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
> -    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
> -      if (!ArgLocs[i].isRegLoc())
> -        return false;
> -  }
> -
> -  // If the calling conventions do not match, then we'd better make sure the
> -  // results are returned in the same way as what the caller expects.
> -  if (!CCMatch) {
> -    SmallVector<CCValAssign, 16> RVLocs1;
> -    CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
> -                    getTargetMachine(), RVLocs1, *DAG.getContext());
> -    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC));
> -
> -    SmallVector<CCValAssign, 16> RVLocs2;
> -    CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
> -                    getTargetMachine(), RVLocs2, *DAG.getContext());
> -    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC));
> -
> -    if (RVLocs1.size() != RVLocs2.size())
> -      return false;
> -    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
> -      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
> -        return false;
> -      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
> -        return false;
> -      if (RVLocs1[i].isRegLoc()) {
> -        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
> -          return false;
> -      } else {
> -        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
> -          return false;
> -      }
> -    }
> -  }
> -
> -  // Nothing more to check if the callee is taking no arguments
> -  if (Outs.empty())
> -    return true;
> -
> -  SmallVector<CCValAssign, 16> ArgLocs;
> -  CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
> -                 getTargetMachine(), ArgLocs, *DAG.getContext());
> -
> -  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
> -
> -  const AArch64MachineFunctionInfo *FuncInfo
> -    = MF.getInfo<AArch64MachineFunctionInfo>();
> -
> -  // If the stack arguments for this call would fit into our own save area then
> -  // the call can be made tail.
> -  return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
> -}
> -
> -bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
> -                                                   bool TailCallOpt) const {
> -  return CallCC == CallingConv::Fast && TailCallOpt;
> -}
> -
> -bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
> -  return CallCC == CallingConv::Fast;
> -}
> -
> -SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
> -                                                   SelectionDAG &DAG,
> -                                                   MachineFrameInfo *MFI,
> -                                                   int ClobberedFI) const {
> -  SmallVector<SDValue, 8> ArgChains;
> -  int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
> -  int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
> -
> -  // Include the original chain at the beginning of the list. When this is
> -  // used by target LowerCall hooks, this helps legalize find the
> -  // CALLSEQ_BEGIN node.
> -  ArgChains.push_back(Chain);
> -
> -  // Add a chain value for each stack argument corresponding
> -  for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
> -         UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U)
> -    if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
> -      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
> -        if (FI->getIndex() < 0) {
> -          int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
> -          int64_t InLastByte = InFirstByte;
> -          InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
> -
> -          if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
> -              (FirstByte <= InFirstByte && InFirstByte <= LastByte))
> -            ArgChains.push_back(SDValue(L, 1));
> -        }
> -
> -   // Build a tokenfactor for all the chains.
> -   return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
> -}
> -
> -static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) {
> -  switch (CC) {
> -  case ISD::SETEQ:  return A64CC::EQ;
> -  case ISD::SETGT:  return A64CC::GT;
> -  case ISD::SETGE:  return A64CC::GE;
> -  case ISD::SETLT:  return A64CC::LT;
> -  case ISD::SETLE:  return A64CC::LE;
> -  case ISD::SETNE:  return A64CC::NE;
> -  case ISD::SETUGT: return A64CC::HI;
> -  case ISD::SETUGE: return A64CC::HS;
> -  case ISD::SETULT: return A64CC::LO;
> -  case ISD::SETULE: return A64CC::LS;
> -  default: llvm_unreachable("Unexpected condition code");
> -  }
> -}
> -
> -bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const {
> -  // icmp is implemented using adds/subs immediate, which take an unsigned
> -  // 12-bit immediate, optionally shifted left by 12 bits.
> -
> -  // Symmetric by using adds/subs
> -  if (Val < 0)
> -    Val = -Val;
> -
> -  return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0;
> -}
> -
> -SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS,
> -                                        ISD::CondCode CC, SDValue &A64cc,
> -                                        SelectionDAG &DAG, SDLoc &dl) const {
> -  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
> -    int64_t C = 0;
> -    EVT VT = RHSC->getValueType(0);
> -    bool knownInvalid = false;
> -
> -    // I'm not convinced the rest of LLVM handles these edge cases properly, but
> -    // we can at least get it right.
> -    if (isSignedIntSetCC(CC)) {
> -      C = RHSC->getSExtValue();
> -    } else if (RHSC->getZExtValue() > INT64_MAX) {
> -      // A 64-bit constant not representable by a signed 64-bit integer is far
> -      // too big to fit into a SUBS immediate anyway.
> -      knownInvalid = true;
> -    } else {
> -      C = RHSC->getZExtValue();
> -    }
> -
> -    if (!knownInvalid && !isLegalICmpImmediate(C)) {
> -      // Constant does not fit, try adjusting it by one?
> -      switch (CC) {
> -      default: break;
> -      case ISD::SETLT:
> -      case ISD::SETGE:
> -        if (isLegalICmpImmediate(C-1)) {
> -          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
> -          RHS = DAG.getConstant(C-1, VT);
> -        }
> -        break;
> -      case ISD::SETULT:
> -      case ISD::SETUGE:
> -        if (isLegalICmpImmediate(C-1)) {
> -          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
> -          RHS = DAG.getConstant(C-1, VT);
> -        }
> -        break;
> -      case ISD::SETLE:
> -      case ISD::SETGT:
> -        if (isLegalICmpImmediate(C+1)) {
> -          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
> -          RHS = DAG.getConstant(C+1, VT);
> -        }
> -        break;
> -      case ISD::SETULE:
> -      case ISD::SETUGT:
> -        if (isLegalICmpImmediate(C+1)) {
> -          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
> -          RHS = DAG.getConstant(C+1, VT);
> -        }
> -        break;
> -      }
> -    }
> -  }
> -
> -  A64CC::CondCodes CondCode = IntCCToA64CC(CC);
> -  A64cc = DAG.getConstant(CondCode, MVT::i32);
> -  return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
> -                     DAG.getCondCode(CC));
> -}
> -
> -static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC,
> -                                    A64CC::CondCodes &Alternative) {
> -  A64CC::CondCodes CondCode = A64CC::Invalid;
> -  Alternative = A64CC::Invalid;
> -
> -  switch (CC) {
> -  default: llvm_unreachable("Unknown FP condition!");
> -  case ISD::SETEQ:
> -  case ISD::SETOEQ: CondCode = A64CC::EQ; break;
> -  case ISD::SETGT:
> -  case ISD::SETOGT: CondCode = A64CC::GT; break;
> -  case ISD::SETGE:
> -  case ISD::SETOGE: CondCode = A64CC::GE; break;
> -  case ISD::SETOLT: CondCode = A64CC::MI; break;
> -  case ISD::SETOLE: CondCode = A64CC::LS; break;
> -  case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break;
> -  case ISD::SETO:   CondCode = A64CC::VC; break;
> -  case ISD::SETUO:  CondCode = A64CC::VS; break;
> -  case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break;
> -  case ISD::SETUGT: CondCode = A64CC::HI; break;
> -  case ISD::SETUGE: CondCode = A64CC::PL; break;
> -  case ISD::SETLT:
> -  case ISD::SETULT: CondCode = A64CC::LT; break;
> -  case ISD::SETLE:
> -  case ISD::SETULE: CondCode = A64CC::LE; break;
> -  case ISD::SETNE:
> -  case ISD::SETUNE: CondCode = A64CC::NE; break;
> -  }
> -  return CondCode;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
> -  SDLoc DL(Op);
> -  EVT PtrVT = getPointerTy();
> -  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
> -
> -  switch(getTargetMachine().getCodeModel()) {
> -  case CodeModel::Small:
> -    // The most efficient code is PC-relative anyway for the small memory model,
> -    // so we don't need to worry about relocation model.
> -    return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
> -                       DAG.getTargetBlockAddress(BA, PtrVT, 0,
> -                                                 AArch64II::MO_NO_FLAG),
> -                       DAG.getTargetBlockAddress(BA, PtrVT, 0,
> -                                                 AArch64II::MO_LO12),
> -                       DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
> -  case CodeModel::Large:
> -    return DAG.getNode(
> -      AArch64ISD::WrapperLarge, DL, PtrVT,
> -      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3),
> -      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
> -      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
> -      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
> -  default:
> -    llvm_unreachable("Only small and large code models supported now");
> -  }
> -}
> -
> -
> -// (BRCOND chain, val, dest)
> -SDValue
> -AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
> -  SDLoc dl(Op);
> -  SDValue Chain = Op.getOperand(0);
> -  SDValue TheBit = Op.getOperand(1);
> -  SDValue DestBB = Op.getOperand(2);
> -
> -  // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
> -  // that as the consumer we are responsible for ignoring rubbish in higher
> -  // bits.
> -  TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
> -                       DAG.getConstant(1, MVT::i32));
> -
> -  SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
> -                               DAG.getConstant(0, TheBit.getValueType()),
> -                               DAG.getCondCode(ISD::SETNE));
> -
> -  return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain,
> -                     A64CMP, DAG.getConstant(A64CC::NE, MVT::i32),
> -                     DestBB);
> -}
> -
> -// (BR_CC chain, condcode, lhs, rhs, dest)
> -SDValue
> -AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
> -  SDLoc dl(Op);
> -  SDValue Chain = Op.getOperand(0);
> -  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
> -  SDValue LHS = Op.getOperand(2);
> -  SDValue RHS = Op.getOperand(3);
> -  SDValue DestBB = Op.getOperand(4);
> -
> -  if (LHS.getValueType() == MVT::f128) {
> -    // f128 comparisons are lowered to runtime calls by a routine which sets
> -    // LHS, RHS and CC appropriately for the rest of this function to continue.
> -    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
> -
> -    // If softenSetCCOperands returned a scalar, we need to compare the result
> -    // against zero to select between true and false values.
> -    if (!RHS.getNode()) {
> -      RHS = DAG.getConstant(0, LHS.getValueType());
> -      CC = ISD::SETNE;
> -    }
> -  }
> -
> -  if (LHS.getValueType().isInteger()) {
> -    SDValue A64cc;
> -
> -    // Integers are handled in a separate function because the combinations of
> -    // immediates and tests can get hairy and we may want to fiddle things.
> -    SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
> -
> -    return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
> -                       Chain, CmpOp, A64cc, DestBB);
> -  }
> -
> -  // Note that some LLVM floating-point CondCodes can't be lowered to a single
> -  // conditional branch, hence FPCCToA64CC can set a second test, where either
> -  // passing is sufficient.
> -  A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
> -  CondCode = FPCCToA64CC(CC, Alternative);
> -  SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
> -  SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
> -                              DAG.getCondCode(CC));
> -  SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
> -                                 Chain, SetCC, A64cc, DestBB);
> -
> -  if (Alternative != A64CC::Invalid) {
> -    A64cc = DAG.getConstant(Alternative, MVT::i32);
> -    A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
> -                           A64BR_CC, SetCC, A64cc, DestBB);
> -
> -  }
> -
> -  return A64BR_CC;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
> -                                       RTLIB::Libcall Call) const {
> -  ArgListTy Args;
> -  ArgListEntry Entry;
> -  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
> -    EVT ArgVT = Op.getOperand(i).getValueType();
> -    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
> -    Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy;
> -    Entry.isSExt = false;
> -    Entry.isZExt = false;
> -    Args.push_back(Entry);
> -  }
> -  SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy());
> -
> -  Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
> -
> -  // By default, the input chain to this libcall is the entry node of the
> -  // function. If the libcall is going to be emitted as a tail call then
> -  // isUsedByReturnOnly will change it to the right chain if the return
> -  // node which is being folded has a non-entry input chain.
> -  SDValue InChain = DAG.getEntryNode();
> -
> -  // isTailCall may be true since the callee does not reference caller stack
> -  // frame. Check if it's in the right position.
> -  SDValue TCChain = InChain;
> -  bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain);
> -  if (isTailCall)
> -    InChain = TCChain;
> -
> -  TargetLowering::CallLoweringInfo CLI(DAG);
> -  CLI.setDebugLoc(SDLoc(Op)).setChain(InChain)
> -    .setCallee(getLibcallCallingConv(Call), RetTy, Callee, &Args, 0)
> -    .setTailCall(isTailCall);
> -
> -  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
> -
> -  if (!CallInfo.second.getNode())
> -    // It's a tailcall, return the chain (which is the DAG root).
> -    return DAG.getRoot();
> -
> -  return CallInfo.first;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
> -  if (Op.getOperand(0).getValueType() != MVT::f128) {
> -    // It's legal except when f128 is involved
> -    return Op;
> -  }
> -
> -  RTLIB::Libcall LC;
> -  LC  = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
> -
> -  SDValue SrcVal = Op.getOperand(0);
> -  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
> -                     /*isSigned*/ false, SDLoc(Op)).first;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
> -  assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
> -
> -  RTLIB::Libcall LC;
> -  LC  = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
> -
> -  return LowerF128ToCall(Op, DAG, LC);
> -}
> -
> -static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG,
> -                                    bool IsSigned) {
> -  SDLoc dl(Op);
> -  EVT VT = Op.getValueType();
> -  SDValue Vec = Op.getOperand(0);
> -  EVT OpVT = Vec.getValueType();
> -  unsigned Opc = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
> -
> -  if (VT.getVectorNumElements() == 1) {
> -    assert(OpVT == MVT::v1f64 && "Unexpected vector type!");
> -    if (VT.getSizeInBits() == OpVT.getSizeInBits())
> -      return Op;
> -    return DAG.UnrollVectorOp(Op.getNode());
> -  }
> -
> -  if (VT.getSizeInBits() > OpVT.getSizeInBits()) {
> -    assert(Vec.getValueType() == MVT::v2f32 && VT == MVT::v2i64 &&
> -           "Unexpected vector type!");
> -    Vec = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Vec);
> -    return DAG.getNode(Opc, dl, VT, Vec);
> -  } else if (VT.getSizeInBits() < OpVT.getSizeInBits()) {
> -    EVT CastVT = EVT::getIntegerVT(*DAG.getContext(),
> -                                   OpVT.getVectorElementType().getSizeInBits());
> -    CastVT =
> -        EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements());
> -    Vec = DAG.getNode(Opc, dl, CastVT, Vec);
> -    return DAG.getNode(ISD::TRUNCATE, dl, VT, Vec);
> -  }
> -  return DAG.getNode(Opc, dl, VT, Vec);
> -}
> -
> -static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
> -  // We custom lower concat_vectors with 4, 8, or 16 operands that are all the
> -  // same operand and of type v1* using the DUP instruction.
> -  unsigned NumOps = Op->getNumOperands();
> -  if (NumOps == 2) {
> -    assert(Op.getValueType().getSizeInBits() == 128 && "unexpected concat");
> -    return Op;
> -  }
> -
> -  if (NumOps != 4 && NumOps != 8 && NumOps != 16)
> -    return SDValue();
> -
> -  // Must be a single value for VDUP.
> -  SDValue Op0 = Op.getOperand(0);
> -  for (unsigned i = 1; i < NumOps; ++i) {
> -    SDValue OpN = Op.getOperand(i);
> -    if (Op0 != OpN)
> -      return SDValue();
> -  }
> -
> -  // Verify the value type.
> -  EVT EltVT = Op0.getValueType();
> -  switch (NumOps) {
> -  default: llvm_unreachable("Unexpected number of operands");
> -  case 4:
> -    if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32)
> -      return SDValue();
> -    break;
> -  case 8:
> -    if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16)
> -      return SDValue();
> -    break;
> -  case 16:
> -    if (EltVT != MVT::v1i8)
> -      return SDValue();
> -    break;
> -  }
> -
> -  SDLoc DL(Op);
> -  EVT VT = Op.getValueType();
> -  // VDUP produces better code for constants.
> -  if (Op0->getOpcode() == ISD::BUILD_VECTOR)
> -    return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0));
> -  return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0,
> -                     DAG.getConstant(0, MVT::i64));
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
> -                                      bool IsSigned) const {
> -  if (Op.getValueType().isVector())
> -    return LowerVectorFP_TO_INT(Op, DAG, IsSigned);
> -  if (Op.getOperand(0).getValueType() != MVT::f128) {
> -    // It's legal except when f128 is involved
> -    return Op;
> -  }
> -
> -  RTLIB::Libcall LC;
> -  if (IsSigned)
> -    LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
> -  else
> -    LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
> -
> -  return LowerF128ToCall(Op, DAG, LC);
> -}
> -
> -SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
> -  MachineFunction &MF = DAG.getMachineFunction();
> -  MachineFrameInfo *MFI = MF.getFrameInfo();
> -  MFI->setReturnAddressIsTaken(true);
> -
> -  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
> -    return SDValue();
> -
> -  EVT VT = Op.getValueType();
> -  SDLoc dl(Op);
> -  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
> -  if (Depth) {
> -    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
> -    SDValue Offset = DAG.getConstant(8, MVT::i64);
> -    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
> -                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
> -                       MachinePointerInfo(), false, false, false, 0);
> -  }
> -
> -  // Return X30, which contains the return address. Mark it an implicit live-in.
> -  unsigned Reg = MF.addLiveIn(AArch64::X30, getRegClassFor(MVT::i64));
> -  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, MVT::i64);
> -}
> -
> -
> -SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG)
> -                                              const {
> -  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
> -  MFI->setFrameAddressIsTaken(true);
> -
> -  EVT VT = Op.getValueType();
> -  SDLoc dl(Op);
> -  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
> -  unsigned FrameReg = AArch64::X29;
> -  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
> -  while (Depth--)
> -    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
> -                            MachinePointerInfo(),
> -                            false, false, false, 0);
> -  return FrameAddr;
> -}
> -
> -// FIXME? Maybe this could be a TableGen attribute on some registers and
> -// this table could be generated automatically from RegInfo.
> -unsigned AArch64TargetLowering::getRegisterByName(const char* RegName,
> -                                                  EVT VT) const {
> -  unsigned Reg = StringSwitch<unsigned>(RegName)
> -                       .Case("sp", AArch64::XSP)
> -                       .Default(0);
> -  if (Reg)
> -    return Reg;
> -  report_fatal_error("Invalid register name global variable");
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op,
> -                                                  SelectionDAG &DAG) const {
> -  assert(getTargetMachine().getCodeModel() == CodeModel::Large);
> -  assert(getTargetMachine().getRelocationModel() == Reloc::Static);
> -
> -  EVT PtrVT = getPointerTy();
> -  SDLoc dl(Op);
> -  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
> -  const GlobalValue *GV = GN->getGlobal();
> -
> -  SDValue GlobalAddr = DAG.getNode(
> -      AArch64ISD::WrapperLarge, dl, PtrVT,
> -      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3),
> -      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
> -      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
> -      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
> -
> -  if (GN->getOffset() != 0)
> -    return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
> -                       DAG.getConstant(GN->getOffset(), PtrVT));
> -
> -  return GlobalAddr;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op,
> -                                                  SelectionDAG &DAG) const {
> -  assert(getTargetMachine().getCodeModel() == CodeModel::Small);
> -
> -  EVT PtrVT = getPointerTy();
> -  SDLoc dl(Op);
> -  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
> -  const GlobalValue *GV = GN->getGlobal();
> -  unsigned Alignment = GV->getAlignment();
> -  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
> -  if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
> -    // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
> -    // to zero when they remain undefined. In PIC mode the GOT can take care of
> -    // this, but in absolute mode we use a constant pool load.
> -    SDValue PoolAddr;
> -    PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
> -                           DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
> -                                                     AArch64II::MO_NO_FLAG),
> -                           DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
> -                                                     AArch64II::MO_LO12),
> -                           DAG.getConstant(8, MVT::i32));
> -    SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
> -                                     MachinePointerInfo::getConstantPool(),
> -                                     /*isVolatile=*/ false,
> -                                     /*isNonTemporal=*/ true,
> -                                     /*isInvariant=*/ true, 8);
> -    if (GN->getOffset() != 0)
> -      return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
> -                         DAG.getConstant(GN->getOffset(), PtrVT));
> -
> -    return GlobalAddr;
> -  }
> -
> -  if (Alignment == 0) {
> -    const PointerType *GVPtrTy = cast<PointerType>(GV->getType());
> -    if (GVPtrTy->getElementType()->isSized()) {
> -      Alignment
> -        = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType());
> -    } else {
> -      // Be conservative if we can't guess, not that it really matters:
> -      // functions and labels aren't valid for loads, and the methods used to
> -      // actually calculate an address work with any alignment.
> -      Alignment = 1;
> -    }
> -  }
> -
> -  unsigned char HiFixup, LoFixup;
> -  bool UseGOT = getSubtarget()->GVIsIndirectSymbol(GV, RelocM);
> -
> -  if (UseGOT) {
> -    HiFixup = AArch64II::MO_GOT;
> -    LoFixup = AArch64II::MO_GOT_LO12;
> -    Alignment = 8;
> -  } else {
> -    HiFixup = AArch64II::MO_NO_FLAG;
> -    LoFixup = AArch64II::MO_LO12;
> -  }
> -
> -  // AArch64's small model demands the following sequence:
> -  // ADRP x0, somewhere
> -  // ADD x0, x0, #:lo12:somewhere ; (or LDR directly).
> -  SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
> -                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
> -                                                             HiFixup),
> -                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
> -                                                             LoFixup),
> -                                  DAG.getConstant(Alignment, MVT::i32));
> -
> -  if (UseGOT) {
> -    GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(),
> -                            GlobalRef);
> -  }
> -
> -  if (GN->getOffset() != 0)
> -    return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef,
> -                       DAG.getConstant(GN->getOffset(), PtrVT));
> -
> -  return GlobalRef;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
> -                                             SelectionDAG &DAG) const {
> -  // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
> -  // we make those distinctions here.
> -
> -  switch (getTargetMachine().getCodeModel()) {
> -  case CodeModel::Small:
> -    return LowerGlobalAddressELFSmall(Op, DAG);
> -  case CodeModel::Large:
> -    return LowerGlobalAddressELFLarge(Op, DAG);
> -  default:
> -    llvm_unreachable("Only small and large code models supported now");
> -  }
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerConstantPool(SDValue Op,
> -                                         SelectionDAG &DAG) const {
> -  SDLoc DL(Op);
> -  EVT PtrVT = getPointerTy();
> -  ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Op);
> -  const Constant *C = CN->getConstVal();
> -
> -  switch(getTargetMachine().getCodeModel()) {
> -  case CodeModel::Small:
> -    // The most efficient code is PC-relative anyway for the small memory model,
> -    // so we don't need to worry about relocation model.
> -    return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
> -                       DAG.getTargetConstantPool(C, PtrVT, 0, 0,
> -                                                 AArch64II::MO_NO_FLAG),
> -                       DAG.getTargetConstantPool(C, PtrVT, 0, 0,
> -                                                 AArch64II::MO_LO12),
> -                       DAG.getConstant(CN->getAlignment(), MVT::i32));
> -  case CodeModel::Large:
> -    return DAG.getNode(
> -      AArch64ISD::WrapperLarge, DL, PtrVT,
> -      DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
> -      DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
> -      DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
> -      DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC));
> -  default:
> -    llvm_unreachable("Only small and large code models supported now");
> -  }
> -}
> -
> -SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
> -                                                SDValue DescAddr,
> -                                                SDLoc DL,
> -                                                SelectionDAG &DAG) const {
> -  EVT PtrVT = getPointerTy();
> -
> -  // The function we need to call is simply the first entry in the GOT for this
> -  // descriptor, load it in preparation.
> -  SDValue Func, Chain;
> -  Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
> -                     DescAddr);
> -
> -  // The function takes only one argument: the address of the descriptor itself
> -  // in X0.
> -  SDValue Glue;
> -  Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
> -  Glue = Chain.getValue(1);
> -
> -  // Finally, there's a special calling-convention which means that the lookup
> -  // must preserve all registers (except X0, obviously).
> -  const TargetRegisterInfo *TRI  = getTargetMachine().getRegisterInfo();
> -  const AArch64RegisterInfo *A64RI
> -    = static_cast<const AArch64RegisterInfo *>(TRI);
> -  const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask();
> -
> -  // We're now ready to populate the argument list, as with a normal call:
> -  std::vector<SDValue> Ops;
> -  Ops.push_back(Chain);
> -  Ops.push_back(Func);
> -  Ops.push_back(SymAddr);
> -  Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
> -  Ops.push_back(DAG.getRegisterMask(Mask));
> -  Ops.push_back(Glue);
> -
> -  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
> -  Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, Ops);
> -  Glue = Chain.getValue(1);
> -
> -  // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it
> -  // back to the generic handling code.
> -  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
> -                                             SelectionDAG &DAG) const {
> -  assert(getSubtarget()->isTargetELF() &&
> -         "TLS not implemented for non-ELF targets");
> -  assert(getTargetMachine().getCodeModel() == CodeModel::Small
> -         && "TLS only supported in small memory model");
> -  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
> -
> -  TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
> -
> -  SDValue TPOff;
> -  EVT PtrVT = getPointerTy();
> -  SDLoc DL(Op);
> -  const GlobalValue *GV = GA->getGlobal();
> -
> -  SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
> -
> -  if (Model == TLSModel::InitialExec) {
> -    TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
> -                        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
> -                                                   AArch64II::MO_GOTTPREL),
> -                        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
> -                                                   AArch64II::MO_GOTTPREL_LO12),
> -                        DAG.getConstant(8, MVT::i32));
> -    TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
> -                        TPOff);
> -  } else if (Model == TLSModel::LocalExec) {
> -    SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
> -                                               AArch64II::MO_TPREL_G1);
> -    SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
> -                                               AArch64II::MO_TPREL_G0_NC);
> -
> -    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
> -                                       DAG.getTargetConstant(1, MVT::i32)), 0);
> -    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
> -                                       TPOff, LoVar,
> -                                       DAG.getTargetConstant(0, MVT::i32)), 0);
> -  } else if (Model == TLSModel::GeneralDynamic) {
> -    // Accesses used in this sequence go via the TLS descriptor which lives in
> -    // the GOT. Prepare an address we can use to handle this.
> -    SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
> -                                                AArch64II::MO_TLSDESC);
> -    SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
> -                                                AArch64II::MO_TLSDESC_LO12);
> -    SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
> -                                   HiDesc, LoDesc,
> -                                   DAG.getConstant(8, MVT::i32));
> -    SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0);
> -
> -    TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
> -  } else if (Model == TLSModel::LocalDynamic) {
> -    // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
> -    // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
> -    // the beginning of the module's TLS region, followed by a DTPREL offset
> -    // calculation.
> -
> -    // These accesses will need deduplicating if there's more than one.
> -    AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction()
> -      .getInfo<AArch64MachineFunctionInfo>();
> -    MFI->incNumLocalDynamicTLSAccesses();
> -
> -
> -    // Get the location of _TLS_MODULE_BASE_:
> -    SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
> -                                                AArch64II::MO_TLSDESC);
> -    SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
> -                                                AArch64II::MO_TLSDESC_LO12);
> -    SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
> -                                   HiDesc, LoDesc,
> -                                   DAG.getConstant(8, MVT::i32));
> -    SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT);
> -
> -    ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
> -
> -    // Get the variable's offset from _TLS_MODULE_BASE_
> -    SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
> -                                               AArch64II::MO_DTPREL_G1);
> -    SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
> -                                               AArch64II::MO_DTPREL_G0_NC);
> -
> -    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
> -                                       DAG.getTargetConstant(0, MVT::i32)), 0);
> -    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
> -                                       TPOff, LoVar,
> -                                       DAG.getTargetConstant(0, MVT::i32)), 0);
> -  } else
> -      llvm_unreachable("Unsupported TLS access model");
> -
> -
> -  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
> -}
> -
> -static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG,
> -                                    bool IsSigned) {
> -  SDLoc dl(Op);
> -  EVT VT = Op.getValueType();
> -  SDValue Vec = Op.getOperand(0);
> -  unsigned Opc = IsSigned ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
> -
> -  if (VT.getVectorNumElements() == 1) {
> -    assert(VT == MVT::v1f64 && "Unexpected vector type!");
> -    if (VT.getSizeInBits() == Vec.getValueSizeInBits())
> -      return Op;
> -    return DAG.UnrollVectorOp(Op.getNode());
> -  }
> -
> -  if (VT.getSizeInBits() < Vec.getValueSizeInBits()) {
> -    assert(Vec.getValueType() == MVT::v2i64 && VT == MVT::v2f32 &&
> -           "Unexpected vector type!");
> -    Vec = DAG.getNode(Opc, dl, MVT::v2f64, Vec);
> -    return DAG.getNode(ISD::FP_ROUND, dl, VT, Vec, DAG.getIntPtrConstant(0));
> -  } else if (VT.getSizeInBits() > Vec.getValueSizeInBits()) {
> -    unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
> -    EVT CastVT = EVT::getIntegerVT(*DAG.getContext(),
> -                                   VT.getVectorElementType().getSizeInBits());
> -    CastVT =
> -        EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements());
> -    Vec = DAG.getNode(CastOpc, dl, CastVT, Vec);
> -  }
> -
> -  return DAG.getNode(Opc, dl, VT, Vec);
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
> -                                      bool IsSigned) const {
> -  if (Op.getValueType().isVector())
> -    return LowerVectorINT_TO_FP(Op, DAG, IsSigned);
> -  if (Op.getValueType() != MVT::f128) {
> -    // Legal for everything except f128.
> -    return Op;
> -  }
> -
> -  RTLIB::Libcall LC;
> -  if (IsSigned)
> -    LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
> -  else
> -    LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
> -
> -  return LowerF128ToCall(Op, DAG, LC);
> -}
> -
> -
> -SDValue
> -AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
> -  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
> -  SDLoc dl(JT);
> -  EVT PtrVT = getPointerTy();
> -
> -  // When compiling PIC, jump tables get put in the code section so a static
> -  // relocation-style is acceptable for both cases.
> -  switch (getTargetMachine().getCodeModel()) {
> -  case CodeModel::Small:
> -    return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
> -                       DAG.getTargetJumpTable(JT->getIndex(), PtrVT),
> -                       DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
> -                                              AArch64II::MO_LO12),
> -                       DAG.getConstant(1, MVT::i32));
> -  case CodeModel::Large:
> -    return DAG.getNode(
> -      AArch64ISD::WrapperLarge, dl, PtrVT,
> -      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3),
> -      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC),
> -      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC),
> -      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC));
> -  default:
> -    llvm_unreachable("Only small and large code models supported now");
> -  }
> -}
> -
> -// (SELECT testbit, iftrue, iffalse)
> -SDValue
> -AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
> -  SDLoc dl(Op);
> -  SDValue TheBit = Op.getOperand(0);
> -  SDValue IfTrue = Op.getOperand(1);
> -  SDValue IfFalse = Op.getOperand(2);
> -
> -  // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
> -  // that as the consumer we are responsible for ignoring rubbish in higher
> -  // bits.
> -  TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
> -                       DAG.getConstant(1, MVT::i32));
> -  SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
> -                               DAG.getConstant(0, TheBit.getValueType()),
> -                               DAG.getCondCode(ISD::SETNE));
> -
> -  return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
> -                     A64CMP, IfTrue, IfFalse,
> -                     DAG.getConstant(A64CC::NE, MVT::i32));
> -}
> -
> -static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) {
> -  SDLoc DL(Op);
> -  SDValue LHS = Op.getOperand(0);
> -  SDValue RHS = Op.getOperand(1);
> -  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
> -  EVT VT = Op.getValueType();
> -  bool Invert = false;
> -  SDValue Op0, Op1;
> -  unsigned Opcode;
> -
> -  if (LHS.getValueType().isInteger()) {
> -
> -    // Attempt to use Vector Integer Compare Mask Test instruction.
> -    // TST = icmp ne (and (op0, op1), zero).
> -    if (CC == ISD::SETNE) {
> -      if (((LHS.getOpcode() == ISD::AND) &&
> -           ISD::isBuildVectorAllZeros(RHS.getNode())) ||
> -          ((RHS.getOpcode() == ISD::AND) &&
> -           ISD::isBuildVectorAllZeros(LHS.getNode()))) {
> -
> -        SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS;
> -        SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0));
> -        SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1));
> -        return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS);
> -      }
> -    }
> -
> -    // Attempt to use Vector Integer Compare Mask against Zero instr (Signed).
> -    // Note: Compare against Zero does not support unsigned predicates.
> -    if ((ISD::isBuildVectorAllZeros(RHS.getNode()) ||
> -         ISD::isBuildVectorAllZeros(LHS.getNode())) &&
> -        !isUnsignedIntSetCC(CC)) {
> -
> -      // If LHS is the zero value, swap operands and CondCode.
> -      if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
> -        CC = getSetCCSwappedOperands(CC);
> -        Op0 = RHS;
> -      } else
> -        Op0 = LHS;
> -
> -      // Ensure valid CondCode for Compare Mask against Zero instruction:
> -      // EQ, GE, GT, LE, LT.
> -      if (ISD::SETNE == CC) {
> -        Invert = true;
> -        CC = ISD::SETEQ;
> -      }
> -
> -      // Using constant type to differentiate integer and FP compares with zero.
> -      Op1 = DAG.getConstant(0, MVT::i32);
> -      Opcode = AArch64ISD::NEON_CMPZ;
> -
> -    } else {
> -      // Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned).
> -      // Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT.
> -      bool Swap = false;
> -      switch (CC) {
> -      default:
> -        llvm_unreachable("Illegal integer comparison.");
> -      case ISD::SETEQ:
> -      case ISD::SETGT:
> -      case ISD::SETGE:
> -      case ISD::SETUGT:
> -      case ISD::SETUGE:
> -        break;
> -      case ISD::SETNE:
> -        Invert = true;
> -        CC = ISD::SETEQ;
> -        break;
> -      case ISD::SETULT:
> -      case ISD::SETULE:
> -      case ISD::SETLT:
> -      case ISD::SETLE:
> -        Swap = true;
> -        CC = getSetCCSwappedOperands(CC);
> -      }
> -
> -      if (Swap)
> -        std::swap(LHS, RHS);
> -
> -      Opcode = AArch64ISD::NEON_CMP;
> -      Op0 = LHS;
> -      Op1 = RHS;
> -    }
> -
> -    // Generate Compare Mask instr or Compare Mask against Zero instr.
> -    SDValue NeonCmp =
> -        DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
> -
> -    if (Invert)
> -      NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
> -
> -    return NeonCmp;
> -  }
> -
> -  // Now handle Floating Point cases.
> -  // Attempt to use Vector Floating Point Compare Mask against Zero instruction.
> -  if (ISD::isBuildVectorAllZeros(RHS.getNode()) ||
> -      ISD::isBuildVectorAllZeros(LHS.getNode())) {
> -
> -    // If LHS is the zero value, swap operands and CondCode.
> -    if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
> -      CC = getSetCCSwappedOperands(CC);
> -      Op0 = RHS;
> -    } else
> -      Op0 = LHS;
> -
> -    // Using constant type to differentiate integer and FP compares with zero.
> -    Op1 = DAG.getConstantFP(0, MVT::f32);
> -    Opcode = AArch64ISD::NEON_CMPZ;
> -  } else {
> -    // Attempt to use Vector Floating Point Compare Mask instruction.
> -    Op0 = LHS;
> -    Op1 = RHS;
> -    Opcode = AArch64ISD::NEON_CMP;
> -  }
> -
> -  SDValue NeonCmpAlt;
> -  // Some register compares have to be implemented with swapped CC and operands,
> -  // e.g.: OLT implemented as OGT with swapped operands.
> -  bool SwapIfRegArgs = false;
> -
> -  // Ensure valid CondCode for FP Compare Mask against Zero instruction:
> -  // EQ, GE, GT, LE, LT.
> -  // And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT.
> -  switch (CC) {
> -  default:
> -    llvm_unreachable("Illegal FP comparison");
> -  case ISD::SETUNE:
> -  case ISD::SETNE:
> -    Invert = true; // Fallthrough
> -  case ISD::SETOEQ:
> -  case ISD::SETEQ:
> -    CC = ISD::SETEQ;
> -    break;
> -  case ISD::SETOLT:
> -  case ISD::SETLT:
> -    CC = ISD::SETLT;
> -    SwapIfRegArgs = true;
> -    break;
> -  case ISD::SETOGT:
> -  case ISD::SETGT:
> -    CC = ISD::SETGT;
> -    break;
> -  case ISD::SETOLE:
> -  case ISD::SETLE:
> -    CC = ISD::SETLE;
> -    SwapIfRegArgs = true;
> -    break;
> -  case ISD::SETOGE:
> -  case ISD::SETGE:
> -    CC = ISD::SETGE;
> -    break;
> -  case ISD::SETUGE:
> -    Invert = true;
> -    CC = ISD::SETLT;
> -    SwapIfRegArgs = true;
> -    break;
> -  case ISD::SETULE:
> -    Invert = true;
> -    CC = ISD::SETGT;
> -    break;
> -  case ISD::SETUGT:
> -    Invert = true;
> -    CC = ISD::SETLE;
> -    SwapIfRegArgs = true;
> -    break;
> -  case ISD::SETULT:
> -    Invert = true;
> -    CC = ISD::SETGE;
> -    break;
> -  case ISD::SETUEQ:
> -    Invert = true; // Fallthrough
> -  case ISD::SETONE:
> -    // Expand this to (OGT |OLT).
> -    NeonCmpAlt =
> -        DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT));
> -    CC = ISD::SETLT;
> -    SwapIfRegArgs = true;
> -    break;
> -  case ISD::SETUO:
> -    Invert = true; // Fallthrough
> -  case ISD::SETO:
> -    // Expand this to (OGE | OLT).
> -    NeonCmpAlt =
> -        DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE));
> -    CC = ISD::SETLT;
> -    SwapIfRegArgs = true;
> -    break;
> -  }
> -
> -  if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) {
> -    CC = getSetCCSwappedOperands(CC);
> -    std::swap(Op0, Op1);
> -  }
> -
> -  // Generate FP Compare Mask instr or FP Compare Mask against Zero instr
> -  SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
> -
> -  if (NeonCmpAlt.getNode())
> -    NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt);
> -
> -  if (Invert)
> -    NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
> -
> -  return NeonCmp;
> -}
> -
> -// (SETCC lhs, rhs, condcode)
> -SDValue
> -AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
> -  SDLoc dl(Op);
> -  SDValue LHS = Op.getOperand(0);
> -  SDValue RHS = Op.getOperand(1);
> -  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
> -  EVT VT = Op.getValueType();
> -
> -  if (VT.isVector())
> -    return LowerVectorSETCC(Op, DAG);
> -
> -  if (LHS.getValueType() == MVT::f128) {
> -    // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
> -    // for the rest of the function (some i32 or i64 values).
> -    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
> -
> -    // If softenSetCCOperands returned a scalar, use it.
> -    if (!RHS.getNode()) {
> -      assert(LHS.getValueType() == Op.getValueType() &&
> -             "Unexpected setcc expansion!");
> -      return LHS;
> -    }
> -  }
> -
> -  if (LHS.getValueType().isInteger()) {
> -    SDValue A64cc;
> -
> -    // Integers are handled in a separate function because the combinations of
> -    // immediates and tests can get hairy and we may want to fiddle things.
> -    SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
> -
> -    return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
> -                       CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT),
> -                       A64cc);
> -  }
> -
> -  // Note that some LLVM floating-point CondCodes can't be lowered to a single
> -  // conditional branch, hence FPCCToA64CC can set a second test, where either
> -  // passing is sufficient.
> -  A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
> -  CondCode = FPCCToA64CC(CC, Alternative);
> -  SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
> -  SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
> -                              DAG.getCondCode(CC));
> -  SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
> -                                     CmpOp, DAG.getConstant(1, VT),
> -                                     DAG.getConstant(0, VT), A64cc);
> -
> -  if (Alternative != A64CC::Invalid) {
> -    A64cc = DAG.getConstant(Alternative, MVT::i32);
> -    A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
> -                               DAG.getConstant(1, VT), A64SELECT_CC, A64cc);
> -  }
> -
> -  return A64SELECT_CC;
> -}
> -
> -static SDValue LowerVectorSELECT_CC(SDValue Op, SelectionDAG &DAG) {
> -  SDLoc dl(Op);
> -  SDValue LHS = Op.getOperand(0);
> -  SDValue RHS = Op.getOperand(1);
> -  SDValue IfTrue = Op.getOperand(2);
> -  SDValue IfFalse = Op.getOperand(3);
> -  EVT IfTrueVT = IfTrue.getValueType();
> -  EVT CondVT = IfTrueVT.changeVectorElementTypeToInteger();
> -  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
> -
> -  // If LHS & RHS are floating point and IfTrue & IfFalse are vectors, we will
> -  // use NEON compare.
> -  if ((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)) {
> -    EVT EltVT = LHS.getValueType();
> -    unsigned EltNum = 128 / EltVT.getSizeInBits();
> -    EVT VT = EVT::getVectorVT(*DAG.getContext(), EltVT, EltNum);
> -    unsigned SubConstant =
> -        (LHS.getValueType() == MVT::f32) ? AArch64::sub_32 :AArch64::sub_64;
> -    EVT CEltT = (LHS.getValueType() == MVT::f32) ? MVT::i32 : MVT::i64;
> -    EVT CVT = EVT::getVectorVT(*DAG.getContext(), CEltT, EltNum);
> -
> -    LHS
> -      = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
> -                  VT, DAG.getTargetConstant(0, MVT::i32), LHS,
> -                  DAG.getTargetConstant(SubConstant, MVT::i32)), 0);
> -    RHS
> -      = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
> -                  VT, DAG.getTargetConstant(0, MVT::i32), RHS,
> -                  DAG.getTargetConstant(SubConstant, MVT::i32)), 0);
> -
> -    SDValue VSetCC = DAG.getSetCC(dl, CVT, LHS, RHS, CC);
> -    SDValue ResCC = LowerVectorSETCC(VSetCC, DAG);
> -    if (CEltT.getSizeInBits() < IfTrueVT.getSizeInBits()) {
> -      EVT DUPVT =
> -          EVT::getVectorVT(*DAG.getContext(), CEltT,
> -                           IfTrueVT.getSizeInBits() / CEltT.getSizeInBits());
> -      ResCC = DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, DUPVT, ResCC,
> -                          DAG.getConstant(0, MVT::i64, false));
> -
> -      ResCC = DAG.getNode(ISD::BITCAST, dl, CondVT, ResCC);
> -    } else {
> -      // FIXME: If IfTrue & IfFalse hold v1i8, v1i16 or v1i32, this function
> -      // can't handle them and will hit this assert.
> -      assert(CEltT.getSizeInBits() == IfTrueVT.getSizeInBits() &&
> -             "Vector of IfTrue & IfFalse is too small.");
> -
> -      unsigned ExEltNum =
> -          EltNum * IfTrueVT.getSizeInBits() / ResCC.getValueSizeInBits();
> -      EVT ExVT = EVT::getVectorVT(*DAG.getContext(), CEltT, ExEltNum);
> -      ResCC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExVT, ResCC,
> -                          DAG.getConstant(0, MVT::i64, false));
> -      ResCC = DAG.getNode(ISD::BITCAST, dl, CondVT, ResCC);
> -    }
> -    SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(),
> -                                  ResCC, IfTrue, IfFalse);
> -    return VSelect;
> -  }
> -
> -  // Here we handle the case that LHS & RHS are integer and IfTrue & IfFalse are
> -  // vectors.
> -  A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
> -  CondCode = FPCCToA64CC(CC, Alternative);
> -  SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
> -  SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
> -                              DAG.getCondCode(CC));
> -  EVT SEVT = MVT::i32;
> -  if (IfTrue.getValueType().getVectorElementType().getSizeInBits() > 32)
> -    SEVT = MVT::i64;
> -  SDValue AllOne = DAG.getConstant(-1, SEVT);
> -  SDValue AllZero = DAG.getConstant(0, SEVT);
> -  SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, SEVT, SetCC,
> -                                     AllOne, AllZero, A64cc);
> -
> -  if (Alternative != A64CC::Invalid) {
> -    A64cc = DAG.getConstant(Alternative, MVT::i32);
> -    A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
> -                               SetCC, AllOne, A64SELECT_CC, A64cc);
> -  }
> -  SDValue VDup;
> -  if (IfTrue.getValueType().getVectorNumElements() == 1)
> -    VDup = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, CondVT, A64SELECT_CC);
> -  else
> -    VDup = DAG.getNode(AArch64ISD::NEON_VDUP, dl, CondVT, A64SELECT_CC);
> -  SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(),
> -                                VDup, IfTrue, IfFalse);
> -  return VSelect;
> -}
> -
> -// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
> -SDValue
> -AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
> -  SDLoc dl(Op);
> -  SDValue LHS = Op.getOperand(0);
> -  SDValue RHS = Op.getOperand(1);
> -  SDValue IfTrue = Op.getOperand(2);
> -  SDValue IfFalse = Op.getOperand(3);
> -  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
> -
> -  if (IfTrue.getValueType().isVector())
> -    return LowerVectorSELECT_CC(Op, DAG);
> -
> -  if (LHS.getValueType() == MVT::f128) {
> -    // f128 comparisons are lowered to libcalls, but slot in nicely here
> -    // afterwards.
> -    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
> -
> -    // If softenSetCCOperands returned a scalar, we need to compare the result
> -    // against zero to select between true and false values.
> -    if (!RHS.getNode()) {
> -      RHS = DAG.getConstant(0, LHS.getValueType());
> -      CC = ISD::SETNE;
> -    }
> -  }
> -
> -  if (LHS.getValueType().isInteger()) {
> -    SDValue A64cc;
> -
> -    // Integers are handled in a separate function because the combinations of
> -    // immediates and tests can get hairy and we may want to fiddle things.
> -    SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
> -
> -    return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), CmpOp,
> -                       IfTrue, IfFalse, A64cc);
> -  }
> -
> -  // Note that some LLVM floating-point CondCodes can't be lowered to a single
> -  // conditional branch, hence FPCCToA64CC can set a second test, where either
> -  // passing is sufficient.
> -  A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
> -  CondCode = FPCCToA64CC(CC, Alternative);
> -  SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
> -  SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
> -                              DAG.getCondCode(CC));
> -  SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
> -                                     Op.getValueType(),
> -                                     SetCC, IfTrue, IfFalse, A64cc);
> -
> -  if (Alternative != A64CC::Invalid) {
> -    A64cc = DAG.getConstant(Alternative, MVT::i32);
> -    A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
> -                               SetCC, IfTrue, A64SELECT_CC, A64cc);
> -
> -  }
> -
> -  return A64SELECT_CC;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
> -  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
> -  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
> -
> -  // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
> -  // rather than just 8.
> -  return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op),
> -                       Op.getOperand(1), Op.getOperand(2),
> -                       DAG.getConstant(32, MVT::i32), 8, false, false,
> -                       MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
> -  // The layout of the va_list struct is specified in the AArch64 Procedure Call
> -  // Standard, section B.3.
> -  MachineFunction &MF = DAG.getMachineFunction();
> -  AArch64MachineFunctionInfo *FuncInfo
> -    = MF.getInfo<AArch64MachineFunctionInfo>();
> -  SDLoc DL(Op);
> -
> -  SDValue Chain = Op.getOperand(0);
> -  SDValue VAList = Op.getOperand(1);
> -  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
> -  SmallVector<SDValue, 4> MemOps;
> -
> -  // void *__stack at offset 0
> -  SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(),
> -                                    getPointerTy());
> -  MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
> -                                MachinePointerInfo(SV), false, false, 0));
> -
> -  // void *__gr_top at offset 8
> -  int GPRSize = FuncInfo->getVariadicGPRSize();
> -  if (GPRSize > 0) {
> -    SDValue GRTop, GRTopAddr;
> -
> -    GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
> -                            DAG.getConstant(8, getPointerTy()));
> -
> -    GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy());
> -    GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
> -                        DAG.getConstant(GPRSize, getPointerTy()));
> -
> -    MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
> -                                  MachinePointerInfo(SV, 8),
> -                                  false, false, 0));
> -  }
> -
> -  // void *__vr_top at offset 16
> -  int FPRSize = FuncInfo->getVariadicFPRSize();
> -  if (FPRSize > 0) {
> -    SDValue VRTop, VRTopAddr;
> -    VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
> -                            DAG.getConstant(16, getPointerTy()));
> -
> -    VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy());
> -    VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
> -                        DAG.getConstant(FPRSize, getPointerTy()));
> -
> -    MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
> -                                  MachinePointerInfo(SV, 16),
> -                                  false, false, 0));
> -  }
> -
> -  // int __gr_offs at offset 24
> -  SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
> -                                   DAG.getConstant(24, getPointerTy()));
> -  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
> -                                GROffsAddr, MachinePointerInfo(SV, 24),
> -                                false, false, 0));
> -
> -  // int __vr_offs at offset 28
> -  SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
> -                                   DAG.getConstant(28, getPointerTy()));
> -  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
> -                                VROffsAddr, MachinePointerInfo(SV, 28),
> -                                false, false, 0));
> -
> -  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
> -  switch (Op.getOpcode()) {
> -  default: llvm_unreachable("Don't know how to custom lower this!");
> -  case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128);
> -  case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128);
> -  case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128);
> -  case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128);
> -  case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true);
> -  case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false);
> -  case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true);
> -  case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
> -  case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
> -  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
> -  case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
> -  case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
> -
> -  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
> -  case ISD::SRL_PARTS:
> -  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
> -
> -  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
> -  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
> -  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
> -  case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG);
> -  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
> -  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
> -  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
> -  case ISD::SELECT: return LowerSELECT(Op, DAG);
> -  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
> -  case ISD::SETCC: return LowerSETCC(Op, DAG);
> -  case ISD::VACOPY: return LowerVACOPY(Op, DAG);
> -  case ISD::VASTART: return LowerVASTART(Op, DAG);
> -  case ISD::BUILD_VECTOR:
> -    return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
> -  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
> -  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
> -  }
> -
> -  return SDValue();
> -}
> -
> -/// Check if the specified splat value corresponds to a valid vector constant
> -/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI).  If
> -/// so, return the encoded 8-bit immediate and the OpCmode instruction fields
> -/// values.
> -static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
> -                              unsigned SplatBitSize, SelectionDAG &DAG,
> -                              bool is128Bits, NeonModImmType type, EVT &VT,
> -                              unsigned &Imm, unsigned &OpCmode) {
> -  switch (SplatBitSize) {
> -  default:
> -    llvm_unreachable("unexpected size for isNeonModifiedImm");
> -  case 8: {
> -    if (type != Neon_Mov_Imm)
> -      return false;
> -    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
> -    // Neon movi per byte: Op=0, Cmode=1110.
> -    OpCmode = 0xe;
> -    Imm = SplatBits;
> -    VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
> -    break;
> -  }
> -  case 16: {
> -    // Neon move inst per halfword
> -    VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
> -    if ((SplatBits & ~0xff) == 0) {
> -      // Value = 0x00nn is 0x00nn LSL 0
> -      // movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000
> -      // bic:  Op=1, Cmode=1001;  orr:  Op=0, Cmode=1001
> -      // Op=x, Cmode=100y
> -      Imm = SplatBits;
> -      OpCmode = 0x8;
> -      break;
> -    }
> -    if ((SplatBits & ~0xff00) == 0) {
> -      // Value = 0xnn00 is 0x00nn LSL 8
> -      // movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010
> -      // bic:  Op=1, Cmode=1011;  orr:  Op=0, Cmode=1011
> -      // Op=x, Cmode=101x
> -      Imm = SplatBits >> 8;
> -      OpCmode = 0xa;
> -      break;
> -    }
> -    // can't handle any other
> -    return false;
> -  }
> -
> -  case 32: {
> -    // First the LSL variants (MSL is unusable by some interested instructions).
> -
> -    // Neon move instr per word, shift zeros
> -    VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
> -    if ((SplatBits & ~0xff) == 0) {
> -      // Value = 0x000000nn is 0x000000nn LSL 0
> -      // movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000
> -      // bic:  Op=1, Cmode= 0001; orr:  Op=0, Cmode= 0001
> -      // Op=x, Cmode=000x
> -      Imm = SplatBits;
> -      OpCmode = 0;
> -      break;
> -    }
> -    if ((SplatBits & ~0xff00) == 0) {
> -      // Value = 0x0000nn00 is 0x000000nn LSL 8
> -      // movi: Op=0, Cmode= 0010;  mvni: Op=1, Cmode= 0010
> -      // bic:  Op=1, Cmode= 0011;  orr : Op=0, Cmode= 0011
> -      // Op=x, Cmode=001x
> -      Imm = SplatBits >> 8;
> -      OpCmode = 0x2;
> -      break;
> -    }
> -    if ((SplatBits & ~0xff0000) == 0) {
> -      // Value = 0x00nn0000 is 0x000000nn LSL 16
> -      // movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100
> -      // bic:  Op=1, Cmode= 0101; orr:  Op=0, Cmode= 0101
> -      // Op=x, Cmode=010x
> -      Imm = SplatBits >> 16;
> -      OpCmode = 0x4;
> -      break;
> -    }
> -    if ((SplatBits & ~0xff000000) == 0) {
> -      // Value = 0xnn000000 is 0x000000nn LSL 24
> -      // movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110
> -      // bic:  Op=1, Cmode= 0111; orr:  Op=0, Cmode= 0111
> -      // Op=x, Cmode=011x
> -      Imm = SplatBits >> 24;
> -      OpCmode = 0x6;
> -      break;
> -    }
> -
> -    // Now the MSL immediates.
> -
> -    // Neon move instr per word, shift ones
> -    if ((SplatBits & ~0xffff) == 0 &&
> -        ((SplatBits | SplatUndef) & 0xff) == 0xff) {
> -      // Value = 0x0000nnff is 0x000000nn MSL 8
> -      // movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100
> -      // Op=x, Cmode=1100
> -      Imm = SplatBits >> 8;
> -      OpCmode = 0xc;
> -      break;
> -    }
> -    if ((SplatBits & ~0xffffff) == 0 &&
> -        ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
> -      // Value = 0x00nnffff is 0x000000nn MSL 16
> -      // movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101
> -      // Op=x, Cmode=1101
> -      Imm = SplatBits >> 16;
> -      OpCmode = 0xd;
> -      break;
> -    }
> -    // can't handle any other
> -    return false;
> -  }
> -
> -  case 64: {
> -    if (type != Neon_Mov_Imm)
> -      return false;
> -    // Neon move instr bytemask, where each byte is either 0x00 or 0xff.
> -    // movi Op=1, Cmode=1110.
> -    OpCmode = 0x1e;
> -    uint64_t BitMask = 0xff;
> -    uint64_t Val = 0;
> -    unsigned ImmMask = 1;
> -    Imm = 0;
> -    for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
> -      if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
> -        Val |= BitMask;
> -        Imm |= ImmMask;
> -      } else if ((SplatBits & BitMask) != 0) {
> -        return false;
> -      }
> -      BitMask <<= 8;
> -      ImmMask <<= 1;
> -    }
> -    SplatBits = Val;
> -    VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
> -    break;
> -  }
> -  }
> -
> -  return true;
> -}
> -
> -static SDValue PerformANDCombine(SDNode *N,
> -                                 TargetLowering::DAGCombinerInfo &DCI) {
> -
> -  SelectionDAG &DAG = DCI.DAG;
> -  SDLoc DL(N);
> -  EVT VT = N->getValueType(0);
> -
> -  // We're looking for an SRA/SHL pair which form an SBFX.
> -
> -  if (VT != MVT::i32 && VT != MVT::i64)
> -    return SDValue();
> -
> -  if (!isa<ConstantSDNode>(N->getOperand(1)))
> -    return SDValue();
> -
> -  uint64_t TruncMask = N->getConstantOperandVal(1);
> -  if (!isMask_64(TruncMask))
> -    return SDValue();
> -
> -  uint64_t Width = CountPopulation_64(TruncMask);
> -  SDValue Shift = N->getOperand(0);
> -
> -  if (Shift.getOpcode() != ISD::SRL)
> -    return SDValue();
> -
> -  if (!isa<ConstantSDNode>(Shift->getOperand(1)))
> -    return SDValue();
> -  uint64_t LSB = Shift->getConstantOperandVal(1);
> -
> -  if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
> -    return SDValue();
> -
> -  return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0),
> -                     DAG.getConstant(LSB, MVT::i64),
> -                     DAG.getConstant(LSB + Width - 1, MVT::i64));
> -}
> -
> -/// For a true bitfield insert, the bits getting into that contiguous mask
> -/// should come from the low part of an existing value: they must be formed from
> -/// a compatible SHL operation (unless they're already low). This function
> -/// checks that condition and returns the least-significant bit that's
> -/// intended. If the operation not a field preparation, -1 is returned.
> -static int32_t getLSBForBFI(SelectionDAG &DAG, SDLoc DL, EVT VT,
> -                            SDValue &MaskedVal, uint64_t Mask) {
> -  if (!isShiftedMask_64(Mask))
> -    return -1;
> -
> -  // Now we need to alter MaskedVal so that it is an appropriate input for a BFI
> -  // instruction. BFI will do a left-shift by LSB before applying the mask we've
> -  // spotted, so in general we should pre-emptively "undo" that by making sure
> -  // the incoming bits have had a right-shift applied to them.
> -  //
> -  // This right shift, however, will combine with existing left/right shifts. In
> -  // the simplest case of a completely straight bitfield operation, it will be
> -  // expected to completely cancel out with an existing SHL. More complicated
> -  // cases (e.g. bitfield to bitfield copy) may still need a real shift before
> -  // the BFI.
> -
> -  uint64_t LSB = countTrailingZeros(Mask);
> -  int64_t ShiftRightRequired = LSB;
> -  if (MaskedVal.getOpcode() == ISD::SHL &&
> -      isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
> -    ShiftRightRequired -= MaskedVal.getConstantOperandVal(1);
> -    MaskedVal = MaskedVal.getOperand(0);
> -  } else if (MaskedVal.getOpcode() == ISD::SRL &&
> -             isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
> -    ShiftRightRequired += MaskedVal.getConstantOperandVal(1);
> -    MaskedVal = MaskedVal.getOperand(0);
> -  }
> -
> -  if (ShiftRightRequired > 0)
> -    MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal,
> -                            DAG.getConstant(ShiftRightRequired, MVT::i64));
> -  else if (ShiftRightRequired < 0) {
> -    // We could actually end up with a residual left shift, for example with
> -    // "struc.bitfield = val << 1".
> -    MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal,
> -                            DAG.getConstant(-ShiftRightRequired, MVT::i64));
> -  }
> -
> -  return LSB;
> -}
> -
> -/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by
> -/// a mask and an extension. Returns true if a BFI was found and provides
> -/// information on its surroundings.
> -static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask,
> -                          bool &Extended) {
> -  Extended = false;
> -  if (N.getOpcode() == ISD::ZERO_EXTEND) {
> -    Extended = true;
> -    N = N.getOperand(0);
> -  }
> -
> -  if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
> -    Mask = N->getConstantOperandVal(1);
> -    N = N.getOperand(0);
> -  } else {
> -    // Mask is the whole width.
> -    Mask = -1ULL >> (64 - N.getValueType().getSizeInBits());
> -  }
> -
> -  if (N.getOpcode() == AArch64ISD::BFI) {
> -    BFI = N;
> -    return true;
> -  }
> -
> -  return false;
> -}
> -
> -/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which
> -/// is roughly equivalent to (and (BFI ...), mask). This form is used because it
> -/// can often be further combined with a larger mask. Ultimately, we want mask
> -/// to be 2^32-1 or 2^64-1 so the AND can be skipped.
> -static SDValue tryCombineToBFI(SDNode *N,
> -                               TargetLowering::DAGCombinerInfo &DCI,
> -                               const AArch64Subtarget *Subtarget) {
> -  SelectionDAG &DAG = DCI.DAG;
> -  SDLoc DL(N);
> -  EVT VT = N->getValueType(0);
> -
> -  assert(N->getOpcode() == ISD::OR && "Unexpected root");
> -
> -  // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or
> -  // abandon the effort.
> -  SDValue LHS = N->getOperand(0);
> -  if (LHS.getOpcode() != ISD::AND)
> -    return SDValue();
> -
> -  uint64_t LHSMask;
> -  if (isa<ConstantSDNode>(LHS.getOperand(1)))
> -    LHSMask = LHS->getConstantOperandVal(1);
> -  else
> -    return SDValue();
> -
> -  // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask
> -  // is or abandon the effort.
> -  SDValue RHS = N->getOperand(1);
> -  if (RHS.getOpcode() != ISD::AND)
> -    return SDValue();
> -
> -  uint64_t RHSMask;
> -  if (isa<ConstantSDNode>(RHS.getOperand(1)))
> -    RHSMask = RHS->getConstantOperandVal(1);
> -  else
> -    return SDValue();
> -
> -  // Can't do anything if the masks are incompatible.
> -  if (LHSMask & RHSMask)
> -    return SDValue();
> -
> -  // Now we need one of the masks to be a contiguous field. Without loss of
> -  // generality that should be the RHS one.
> -  SDValue Bitfield = LHS.getOperand(0);
> -  if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) {
> -    // We know that LHS is a candidate new value, and RHS isn't already a better
> -    // one.
> -    std::swap(LHS, RHS);
> -    std::swap(LHSMask, RHSMask);
> -  }
> -
> -  // We've done our best to put the right operands in the right places, all we
> -  // can do now is check whether a BFI exists.
> -  Bitfield = RHS.getOperand(0);
> -  int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask);
> -  if (LSB == -1)
> -    return SDValue();
> -
> -  uint32_t Width = CountPopulation_64(RHSMask);
> -  assert(Width && "Expected non-zero bitfield width");
> -
> -  SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
> -                            LHS.getOperand(0), Bitfield,
> -                            DAG.getConstant(LSB, MVT::i64),
> -                            DAG.getConstant(Width, MVT::i64));
> -
> -  // Mask is trivial
> -  if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits())))
> -    return BFI;
> -
> -  return DAG.getNode(ISD::AND, DL, VT, BFI,
> -                     DAG.getConstant(LHSMask | RHSMask, VT));
> -}
> -
> -/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its
> -/// original input. This is surprisingly common because SROA splits things up
> -/// into i8 chunks, so the originally detected MaskedBFI may actually only act
> -/// on the low (say) byte of a word. This is then orred into the rest of the
> -/// word afterwards.
> -///
> -/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)).
> -///
> -/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the
> -/// MaskedBFI. We can also deal with a certain amount of extend/truncate being
> -/// involved.
> -static SDValue tryCombineToLargerBFI(SDNode *N,
> -                                     TargetLowering::DAGCombinerInfo &DCI,
> -                                     const AArch64Subtarget *Subtarget) {
> -  SelectionDAG &DAG = DCI.DAG;
> -  SDLoc DL(N);
> -  EVT VT = N->getValueType(0);
> -
> -  // First job is to hunt for a MaskedBFI on either the left or right. Swap
> -  // operands if it's actually on the right.
> -  SDValue BFI;
> -  SDValue PossExtraMask;
> -  uint64_t ExistingMask = 0;
> -  bool Extended = false;
> -  if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended))
> -    PossExtraMask = N->getOperand(1);
> -  else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended))
> -    PossExtraMask = N->getOperand(0);
> -  else
> -    return SDValue();
> -
> -  // We can only combine a BFI with another compatible mask.
> -  if (PossExtraMask.getOpcode() != ISD::AND ||
> -      !isa<ConstantSDNode>(PossExtraMask.getOperand(1)))
> -    return SDValue();
> -
> -  uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1);
> -
> -  // Masks must be compatible.
> -  if (ExtraMask & ExistingMask)
> -    return SDValue();
> -
> -  SDValue OldBFIVal = BFI.getOperand(0);
> -  SDValue NewBFIVal = BFI.getOperand(1);
> -  if (Extended) {
> -    // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be
> -    // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments
> -    // need to be made compatible.
> -    assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32
> -           && "Invalid types for BFI");
> -    OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal);
> -    NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal);
> -  }
> -
> -  // We need the MaskedBFI to be combined with a mask of the *same* value.
> -  if (PossExtraMask.getOperand(0) != OldBFIVal)
> -    return SDValue();
> -
> -  BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
> -                    OldBFIVal, NewBFIVal,
> -                    BFI.getOperand(2), BFI.getOperand(3));
> -
> -  // If the masking is trivial, we don't need to create it.
> -  if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits())))
> -    return BFI;
> -
> -  return DAG.getNode(ISD::AND, DL, VT, BFI,
> -                     DAG.getConstant(ExtraMask | ExistingMask, VT));
> -}
> -
> -/// An EXTR instruction is made up of two shifts, ORed together. This helper
> -/// searches for and classifies those shifts.
> -static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
> -                         bool &FromHi) {
> -  if (N.getOpcode() == ISD::SHL)
> -    FromHi = false;
> -  else if (N.getOpcode() == ISD::SRL)
> -    FromHi = true;
> -  else
> -    return false;
> -
> -  if (!isa<ConstantSDNode>(N.getOperand(1)))
> -    return false;
> -
> -  ShiftAmount = N->getConstantOperandVal(1);
> -  Src = N->getOperand(0);
> -  return true;
> -}
> -
> -/// EXTR instruction extracts a contiguous chunk of bits from two existing
> -/// registers viewed as a high/low pair. This function looks for the pattern:
> -/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
> -/// EXTR. Can't quite be done in TableGen because the two immediates aren't
> -/// independent.
> -static SDValue tryCombineToEXTR(SDNode *N,
> -                                TargetLowering::DAGCombinerInfo &DCI) {
> -  SelectionDAG &DAG = DCI.DAG;
> -  SDLoc DL(N);
> -  EVT VT = N->getValueType(0);
> -
> -  assert(N->getOpcode() == ISD::OR && "Unexpected root");
> -
> -  if (VT != MVT::i32 && VT != MVT::i64)
> -    return SDValue();
> -
> -  SDValue LHS;
> -  uint32_t ShiftLHS = 0;
> -  bool LHSFromHi = 0;
> -  if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
> -    return SDValue();
> -
> -  SDValue RHS;
> -  uint32_t ShiftRHS = 0;
> -  bool RHSFromHi = 0;
> -  if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
> -    return SDValue();
> -
> -  // If they're both trying to come from the high part of the register, they're
> -  // not really an EXTR.
> -  if (LHSFromHi == RHSFromHi)
> -    return SDValue();
> -
> -  if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
> -    return SDValue();
> -
> -  if (LHSFromHi) {
> -    std::swap(LHS, RHS);
> -    std::swap(ShiftLHS, ShiftRHS);
> -  }
> -
> -  return DAG.getNode(AArch64ISD::EXTR, DL, VT,
> -                     LHS, RHS,
> -                     DAG.getConstant(ShiftRHS, MVT::i64));
> -}
> -
> -/// Target-specific dag combine xforms for ISD::OR
> -static SDValue PerformORCombine(SDNode *N,
> -                                TargetLowering::DAGCombinerInfo &DCI,
> -                                const AArch64Subtarget *Subtarget) {
> -
> -  SelectionDAG &DAG = DCI.DAG;
> -  SDLoc DL(N);
> -  EVT VT = N->getValueType(0);
> -
> -  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
> -    return SDValue();
> -
> -  // Attempt to recognise bitfield-insert operations.
> -  SDValue Res = tryCombineToBFI(N, DCI, Subtarget);
> -  if (Res.getNode())
> -    return Res;
> -
> -  // Attempt to combine an existing MaskedBFI operation into one with a larger
> -  // mask.
> -  Res = tryCombineToLargerBFI(N, DCI, Subtarget);
> -  if (Res.getNode())
> -    return Res;
> -
> -  Res = tryCombineToEXTR(N, DCI);
> -  if (Res.getNode())
> -    return Res;
> -
> -  if (!Subtarget->hasNEON())
> -    return SDValue();
> -
> -  // Attempt to use vector immediate-form BSL
> -  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
> -
> -  SDValue N0 = N->getOperand(0);
> -  if (N0.getOpcode() != ISD::AND)
> -    return SDValue();
> -
> -  SDValue N1 = N->getOperand(1);
> -  if (N1.getOpcode() != ISD::AND)
> -    return SDValue();
> -
> -  if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
> -    APInt SplatUndef;
> -    unsigned SplatBitSize;
> -    bool HasAnyUndefs;
> -    BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
> -    APInt SplatBits0;
> -    if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
> -                                      HasAnyUndefs) &&
> -        !HasAnyUndefs) {
> -      BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
> -      APInt SplatBits1;
> -      if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
> -                                        HasAnyUndefs) && !HasAnyUndefs &&
> -          SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
> -          SplatBits0 == ~SplatBits1) {
> -
> -        return DAG.getNode(ISD::VSELECT, DL, VT, N0->getOperand(1),
> -                           N0->getOperand(0), N1->getOperand(0));
> -      }
> -    }
> -  }
> -
> -  return SDValue();
> -}
> -
> -/// Target-specific dag combine xforms for ISD::SRA
> -static SDValue PerformSRACombine(SDNode *N,
> -                                 TargetLowering::DAGCombinerInfo &DCI) {
> -
> -  SelectionDAG &DAG = DCI.DAG;
> -  SDLoc DL(N);
> -  EVT VT = N->getValueType(0);
> -
> -  // We're looking for an SRA/SHL pair which form an SBFX.
> -
> -  if (VT != MVT::i32 && VT != MVT::i64)
> -    return SDValue();
> -
> -  if (!isa<ConstantSDNode>(N->getOperand(1)))
> -    return SDValue();
> -
> -  uint64_t ExtraSignBits = N->getConstantOperandVal(1);
> -  SDValue Shift = N->getOperand(0);
> -
> -  if (Shift.getOpcode() != ISD::SHL)
> -    return SDValue();
> -
> -  if (!isa<ConstantSDNode>(Shift->getOperand(1)))
> -    return SDValue();
> -
> -  uint64_t BitsOnLeft = Shift->getConstantOperandVal(1);
> -  uint64_t Width = VT.getSizeInBits() - ExtraSignBits;
> -  uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft;
> -
> -  if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
> -    return SDValue();
> -
> -  return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0),
> -                     DAG.getConstant(LSB, MVT::i64),
> -                     DAG.getConstant(LSB + Width - 1, MVT::i64));
> -}
> -
> -/// Check if this is a valid build_vector for the immediate operand of
> -/// a vector shift operation, where all the elements of the build_vector
> -/// must have the same constant integer value.
> -static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
> -  // Ignore bit_converts.
> -  while (Op.getOpcode() == ISD::BITCAST)
> -    Op = Op.getOperand(0);
> -  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
> -  APInt SplatBits, SplatUndef;
> -  unsigned SplatBitSize;
> -  bool HasAnyUndefs;
> -  if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
> -                                      HasAnyUndefs, ElementBits) ||
> -      SplatBitSize > ElementBits)
> -    return false;
> -  Cnt = SplatBits.getSExtValue();
> -  return true;
> -}
> -
> -/// Check if this is a valid build_vector for the immediate operand of
> -/// a vector shift left operation.  That value must be in the range:
> -/// 0 <= Value < ElementBits
> -static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) {
> -  assert(VT.isVector() && "vector shift count is not a vector type");
> -  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
> -  if (!getVShiftImm(Op, ElementBits, Cnt))
> -    return false;
> -  return (Cnt >= 0 && Cnt < ElementBits);
> -}
> -
> -/// Check if this is a valid build_vector for the immediate operand of a
> -/// vector shift right operation. The value must be in the range:
> -///   1 <= Value <= ElementBits
> -static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) {
> -  assert(VT.isVector() && "vector shift count is not a vector type");
> -  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
> -  if (!getVShiftImm(Op, ElementBits, Cnt))
> -    return false;
> -  return (Cnt >= 1 && Cnt <= ElementBits);
> -}
> -
> -static SDValue GenForSextInreg(SDNode *N,
> -                               TargetLowering::DAGCombinerInfo &DCI,
> -                               EVT SrcVT, EVT DestVT, EVT SubRegVT,
> -                               const int *Mask, SDValue Src) {
> -  SelectionDAG &DAG = DCI.DAG;
> -  SDValue Bitcast
> -    = DAG.getNode(ISD::BITCAST, SDLoc(N), SrcVT, Src);
> -  SDValue Sext
> -    = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), DestVT, Bitcast);
> -  SDValue ShuffleVec
> -    = DAG.getVectorShuffle(DestVT, SDLoc(N), Sext, DAG.getUNDEF(DestVT), Mask);
> -  SDValue ExtractSubreg
> -    = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N),
> -                SubRegVT, ShuffleVec,
> -                DAG.getTargetConstant(AArch64::sub_64, MVT::i32)), 0);
> -  return ExtractSubreg;
> -}
> -
> -/// Checks for vector shifts and lowers them.
> -static SDValue PerformShiftCombine(SDNode *N,
> -                                   TargetLowering::DAGCombinerInfo &DCI,
> -                                   const AArch64Subtarget *ST) {
> -  SelectionDAG &DAG = DCI.DAG;
> -  EVT VT = N->getValueType(0);
> -  if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64))
> -    return PerformSRACombine(N, DCI);
> -
> -  // We're looking for an SRA/SHL pair to help generating instruction
> -  //   sshll  v0.8h, v0.8b, #0
> -  // The instruction STXL is also the alias of this instruction.
> -  //
> -  // For example, for DAG like below,
> -  //   v2i32 = sra (v2i32 (shl v2i32, 16)), 16
> -  // we can transform it into
> -  //   v2i32 = EXTRACT_SUBREG
> -  //             (v4i32 (suffle_vector
> -  //                       (v4i32 (sext (v4i16 (bitcast v2i32))),
> -  //                       undef, (0, 2, u, u)),
> -  //             sub_64
> -  //
> -  // With this transformation we expect to generate "SSHLL + UZIP1"
> -  // Sometimes UZIP1 can be optimized away by combining with other context.
> -  int64_t ShrCnt, ShlCnt;
> -  if (N->getOpcode() == ISD::SRA
> -      && (VT == MVT::v2i32 || VT == MVT::v4i16)
> -      && isVShiftRImm(N->getOperand(1), VT, ShrCnt)
> -      && N->getOperand(0).getOpcode() == ISD::SHL
> -      && isVShiftRImm(N->getOperand(0).getOperand(1), VT, ShlCnt)) {
> -    SDValue Src = N->getOperand(0).getOperand(0);
> -    if (VT == MVT::v2i32 && ShrCnt == 16 && ShlCnt == 16) {
> -      // sext_inreg(v2i32, v2i16)
> -      // We essentially only care the Mask {0, 2, u, u}
> -      int Mask[4] = {0, 2, 4, 6};
> -      return GenForSextInreg(N, DCI, MVT::v4i16, MVT::v4i32, MVT::v2i32,
> -                             Mask, Src);
> -    }
> -    else if (VT == MVT::v2i32 && ShrCnt == 24 && ShlCnt == 24) {
> -      // sext_inreg(v2i16, v2i8)
> -      // We essentially only care the Mask {0, u, 4, u, u, u, u, u, u, u, u, u}
> -      int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14};
> -      return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v2i32,
> -                             Mask, Src);
> -    }
> -    else if (VT == MVT::v4i16 && ShrCnt == 8 && ShlCnt == 8) {
> -      // sext_inreg(v4i16, v4i8)
> -      // We essentially only care the Mask {0, 2, 4, 6, u, u, u, u, u, u, u, u}
> -      int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14};
> -      return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v4i16,
> -                             Mask, Src);
> -    }
> -  }
> -
> -  // Nothing to be done for scalar shifts.
> -  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
> -  if (!VT.isVector() || !TLI.isTypeLegal(VT))
> -    return SDValue();
> -
> -  assert(ST->hasNEON() && "unexpected vector shift");
> -  int64_t Cnt;
> -
> -  switch (N->getOpcode()) {
> -  default:
> -    llvm_unreachable("unexpected shift opcode");
> -
> -  case ISD::SHL:
> -    if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
> -      SDValue RHS =
> -          DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
> -                      DAG.getConstant(Cnt, MVT::i32));
> -      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
> -    }
> -    break;
> -
> -  case ISD::SRA:
> -  case ISD::SRL:
> -    if (isVShiftRImm(N->getOperand(1), VT, Cnt)) {
> -      SDValue RHS =
> -          DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
> -                      DAG.getConstant(Cnt, MVT::i32));
> -      return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS);
> -    }
> -    break;
> -  }
> -
> -  return SDValue();
> -}
> -
> -/// ARM-specific DAG combining for intrinsics.
> -static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
> -  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
> -
> -  switch (IntNo) {
> -  default:
> -    // Don't do anything for most intrinsics.
> -    break;
> -
> -  case Intrinsic::arm_neon_vqshifts:
> -  case Intrinsic::arm_neon_vqshiftu:
> -    EVT VT = N->getOperand(1).getValueType();
> -    int64_t Cnt;
> -    if (!isVShiftLImm(N->getOperand(2), VT, Cnt))
> -      break;
> -    unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts)
> -                             ? AArch64ISD::NEON_QSHLs
> -                             : AArch64ISD::NEON_QSHLu;
> -    return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
> -                       N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
> -  }
> -
> -  return SDValue();
> -}
> -
> -/// Target-specific DAG combine function for NEON load/store intrinsics
> -/// to merge base address updates.
> -static SDValue CombineBaseUpdate(SDNode *N,
> -                                 TargetLowering::DAGCombinerInfo &DCI) {
> -  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
> -    return SDValue();
> -
> -  SelectionDAG &DAG = DCI.DAG;
> -  bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
> -                      N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
> -  unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
> -  SDValue Addr = N->getOperand(AddrOpIdx);
> -
> -  // Search for a use of the address operand that is an increment.
> -  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
> -       UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
> -    SDNode *User = *UI;
> -    if (User->getOpcode() != ISD::ADD ||
> -        UI.getUse().getResNo() != Addr.getResNo())
> -      continue;
> -
> -    // Check that the add is independent of the load/store.  Otherwise, folding
> -    // it would create a cycle.
> -    if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
> -      continue;
> -
> -    // Find the new opcode for the updating load/store.
> -    bool isLoad = true;
> -    bool isLaneOp = false;
> -    unsigned NewOpc = 0;
> -    unsigned NumVecs = 0;
> -    if (isIntrinsic) {
> -      unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
> -      switch (IntNo) {
> -      default: llvm_unreachable("unexpected intrinsic for Neon base update");
> -      case Intrinsic::arm_neon_vld1:       NewOpc = AArch64ISD::NEON_LD1_UPD;
> -        NumVecs = 1; break;
> -      case Intrinsic::arm_neon_vld2:       NewOpc = AArch64ISD::NEON_LD2_UPD;
> -        NumVecs = 2; break;
> -      case Intrinsic::arm_neon_vld3:       NewOpc = AArch64ISD::NEON_LD3_UPD;
> -        NumVecs = 3; break;
> -      case Intrinsic::arm_neon_vld4:       NewOpc = AArch64ISD::NEON_LD4_UPD;
> -        NumVecs = 4; break;
> -      case Intrinsic::arm_neon_vst1:       NewOpc = AArch64ISD::NEON_ST1_UPD;
> -        NumVecs = 1; isLoad = false; break;
> -      case Intrinsic::arm_neon_vst2:       NewOpc = AArch64ISD::NEON_ST2_UPD;
> -        NumVecs = 2; isLoad = false; break;
> -      case Intrinsic::arm_neon_vst3:       NewOpc = AArch64ISD::NEON_ST3_UPD;
> -        NumVecs = 3; isLoad = false; break;
> -      case Intrinsic::arm_neon_vst4:       NewOpc = AArch64ISD::NEON_ST4_UPD;
> -        NumVecs = 4; isLoad = false; break;
> -      case Intrinsic::aarch64_neon_vld1x2: NewOpc = AArch64ISD::NEON_LD1x2_UPD;
> -        NumVecs = 2; break;
> -      case Intrinsic::aarch64_neon_vld1x3: NewOpc = AArch64ISD::NEON_LD1x3_UPD;
> -        NumVecs = 3; break;
> -      case Intrinsic::aarch64_neon_vld1x4: NewOpc = AArch64ISD::NEON_LD1x4_UPD;
> -        NumVecs = 4; break;
> -      case Intrinsic::aarch64_neon_vst1x2: NewOpc = AArch64ISD::NEON_ST1x2_UPD;
> -        NumVecs = 2; isLoad = false; break;
> -      case Intrinsic::aarch64_neon_vst1x3: NewOpc = AArch64ISD::NEON_ST1x3_UPD;
> -        NumVecs = 3; isLoad = false; break;
> -      case Intrinsic::aarch64_neon_vst1x4: NewOpc = AArch64ISD::NEON_ST1x4_UPD;
> -        NumVecs = 4; isLoad = false; break;
> -      case Intrinsic::arm_neon_vld2lane:   NewOpc = AArch64ISD::NEON_LD2LN_UPD;
> -        NumVecs = 2; isLaneOp = true; break;
> -      case Intrinsic::arm_neon_vld3lane:   NewOpc = AArch64ISD::NEON_LD3LN_UPD;
> -        NumVecs = 3; isLaneOp = true; break;
> -      case Intrinsic::arm_neon_vld4lane:   NewOpc = AArch64ISD::NEON_LD4LN_UPD;
> -        NumVecs = 4; isLaneOp = true; break;
> -      case Intrinsic::arm_neon_vst2lane:   NewOpc = AArch64ISD::NEON_ST2LN_UPD;
> -        NumVecs = 2; isLoad = false; isLaneOp = true; break;
> -      case Intrinsic::arm_neon_vst3lane:   NewOpc = AArch64ISD::NEON_ST3LN_UPD;
> -        NumVecs = 3; isLoad = false; isLaneOp = true; break;
> -      case Intrinsic::arm_neon_vst4lane:   NewOpc = AArch64ISD::NEON_ST4LN_UPD;
> -        NumVecs = 4; isLoad = false; isLaneOp = true; break;
> -      }
> -    } else {
> -      isLaneOp = true;
> -      switch (N->getOpcode()) {
> -      default: llvm_unreachable("unexpected opcode for Neon base update");
> -      case AArch64ISD::NEON_LD2DUP: NewOpc = AArch64ISD::NEON_LD2DUP_UPD;
> -        NumVecs = 2; break;
> -      case AArch64ISD::NEON_LD3DUP: NewOpc = AArch64ISD::NEON_LD3DUP_UPD;
> -        NumVecs = 3; break;
> -      case AArch64ISD::NEON_LD4DUP: NewOpc = AArch64ISD::NEON_LD4DUP_UPD;
> -        NumVecs = 4; break;
> -      }
> -    }
> -
> -    // Find the size of memory referenced by the load/store.
> -    EVT VecTy;
> -    if (isLoad)
> -      VecTy = N->getValueType(0);
> -    else
> -      VecTy = N->getOperand(AddrOpIdx + 1).getValueType();
> -    unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
> -    if (isLaneOp)
> -      NumBytes /= VecTy.getVectorNumElements();
> -
> -    // If the increment is a constant, it must match the memory ref size.
> -    SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
> -    if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
> -      uint32_t IncVal = CInc->getZExtValue();
> -      if (IncVal != NumBytes)
> -        continue;
> -      Inc = DAG.getTargetConstant(IncVal, MVT::i32);
> -    }
> -
> -    // Create the new updating load/store node.
> -    EVT Tys[6];
> -    unsigned NumResultVecs = (isLoad ? NumVecs : 0);
> -    unsigned n;
> -    for (n = 0; n < NumResultVecs; ++n)
> -      Tys[n] = VecTy;
> -    Tys[n++] = MVT::i64;
> -    Tys[n] = MVT::Other;
> -    SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs + 2));
> -    SmallVector<SDValue, 8> Ops;
> -    Ops.push_back(N->getOperand(0)); // incoming chain
> -    Ops.push_back(N->getOperand(AddrOpIdx));
> -    Ops.push_back(Inc);
> -    for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
> -      Ops.push_back(N->getOperand(i));
> -    }
> -    MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
> -    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
> -                                           Ops, MemInt->getMemoryVT(),
> -                                           MemInt->getMemOperand());
> -
> -    // Update the uses.
> -    std::vector<SDValue> NewResults;
> -    for (unsigned i = 0; i < NumResultVecs; ++i) {
> -      NewResults.push_back(SDValue(UpdN.getNode(), i));
> -    }
> -    NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
> -    DCI.CombineTo(N, NewResults);
> -    DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
> -
> -    break;
> -  }
> -  return SDValue();
> -}
> -
> -/// For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1)
> -/// intrinsic, and if all the other uses of that intrinsic are also VDUPLANEs.
> -/// If so, combine them to a vldN-dup operation and return true.
> -static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
> -  SelectionDAG &DAG = DCI.DAG;
> -  EVT VT = N->getValueType(0);
> -
> -  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
> -  SDNode *VLD = N->getOperand(0).getNode();
> -  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
> -    return SDValue();
> -  unsigned NumVecs = 0;
> -  unsigned NewOpc = 0;
> -  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
> -  if (IntNo == Intrinsic::arm_neon_vld2lane) {
> -    NumVecs = 2;
> -    NewOpc = AArch64ISD::NEON_LD2DUP;
> -  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
> -    NumVecs = 3;
> -    NewOpc = AArch64ISD::NEON_LD3DUP;
> -  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
> -    NumVecs = 4;
> -    NewOpc = AArch64ISD::NEON_LD4DUP;
> -  } else {
> -    return SDValue();
> -  }
> -
> -  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
> -  // numbers match the load.
> -  unsigned VLDLaneNo =
> -      cast<ConstantSDNode>(VLD->getOperand(NumVecs + 3))->getZExtValue();
> -  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
> -       UI != UE; ++UI) {
> -    // Ignore uses of the chain result.
> -    if (UI.getUse().getResNo() == NumVecs)
> -      continue;
> -    SDNode *User = *UI;
> -    if (User->getOpcode() != AArch64ISD::NEON_VDUPLANE ||
> -        VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
> -      return SDValue();
> -  }
> -
> -  // Create the vldN-dup node.
> -  EVT Tys[5];
> -  unsigned n;
> -  for (n = 0; n < NumVecs; ++n)
> -    Tys[n] = VT;
> -  Tys[n] = MVT::Other;
> -  SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs + 1));
> -  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
> -  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
> -  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops,
> -                                           VLDMemInt->getMemoryVT(),
> -                                           VLDMemInt->getMemOperand());
> -
> -  // Update the uses.
> -  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
> -       UI != UE; ++UI) {
> -    unsigned ResNo = UI.getUse().getResNo();
> -    // Ignore uses of the chain result.
> -    if (ResNo == NumVecs)
> -      continue;
> -    SDNode *User = *UI;
> -    DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
> -  }
> -
> -  // Now the vldN-lane intrinsic is dead except for its chain result.
> -  // Update uses of the chain.
> -  std::vector<SDValue> VLDDupResults;
> -  for (unsigned n = 0; n < NumVecs; ++n)
> -    VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
> -  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
> -  DCI.CombineTo(VLD, VLDDupResults);
> -
> -  return SDValue(N, 0);
> -}
> -
> -// vselect (v1i1 setcc) ->
> -//     vselect (v1iXX setcc)  (XX is the size of the compared operand type)
> -// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
> -// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
> -// such VSELECT.
> -static SDValue PerformVSelectCombine(SDNode *N, SelectionDAG &DAG) {
> -  SDValue N0 = N->getOperand(0);
> -  EVT CCVT = N0.getValueType();
> -
> -  if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 ||
> -      CCVT.getVectorElementType() != MVT::i1)
> -    return SDValue();
> -
> -  EVT ResVT = N->getValueType(0);
> -  EVT CmpVT = N0.getOperand(0).getValueType();
> -  // Only combine when the result type is of the same size as the compared
> -  // operands.
> -  if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
> -    return SDValue();
> -
> -  SDValue IfTrue = N->getOperand(1);
> -  SDValue IfFalse = N->getOperand(2);
> -  SDValue SetCC =
> -      DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
> -                   N0.getOperand(0), N0.getOperand(1),
> -                   cast<CondCodeSDNode>(N0.getOperand(2))->get());
> -  return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
> -                     IfTrue, IfFalse);
> -}
> -
> -// sign_extend (extract_vector_elt (v1i1 setcc)) ->
> -//     extract_vector_elt (v1iXX setcc)
> -// (XX is the size of the compared operand type)
> -static SDValue PerformSignExtendCombine(SDNode *N, SelectionDAG &DAG) {
> -  SDValue N0 = N->getOperand(0);
> -  SDValue Vec = N0.getOperand(0);
> -
> -  if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
> -      Vec.getOpcode() != ISD::SETCC)
> -    return SDValue();
> -
> -  EVT ResVT = N->getValueType(0);
> -  EVT CmpVT = Vec.getOperand(0).getValueType();
> -  // Only optimize when the result type is of the same size as the element
> -  // type of the compared operand.
> -  if (ResVT.getSizeInBits() != CmpVT.getVectorElementType().getSizeInBits())
> -    return SDValue();
> -
> -  SDValue Lane = N0.getOperand(1);
> -  SDValue SetCC =
> -      DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
> -                   Vec.getOperand(0), Vec.getOperand(1),
> -                   cast<CondCodeSDNode>(Vec.getOperand(2))->get());
> -  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ResVT,
> -                     SetCC, Lane);
> -}
> -
> -SDValue
> -AArch64TargetLowering::PerformDAGCombine(SDNode *N,
> -                                         DAGCombinerInfo &DCI) const {
> -  switch (N->getOpcode()) {
> -  default: break;
> -  case ISD::AND: return PerformANDCombine(N, DCI);
> -  case ISD::OR: return PerformORCombine(N, DCI, getSubtarget());
> -  case ISD::SHL:
> -  case ISD::SRA:
> -  case ISD::SRL:
> -    return PerformShiftCombine(N, DCI, getSubtarget());
> -  case ISD::VSELECT: return PerformVSelectCombine(N, DCI.DAG);
> -  case ISD::SIGN_EXTEND: return PerformSignExtendCombine(N, DCI.DAG);
> -  case ISD::INTRINSIC_WO_CHAIN:
> -    return PerformIntrinsicCombine(N, DCI.DAG);
> -  case AArch64ISD::NEON_VDUPLANE:
> -    return CombineVLDDUP(N, DCI);
> -  case AArch64ISD::NEON_LD2DUP:
> -  case AArch64ISD::NEON_LD3DUP:
> -  case AArch64ISD::NEON_LD4DUP:
> -    return CombineBaseUpdate(N, DCI);
> -  case ISD::INTRINSIC_VOID:
> -  case ISD::INTRINSIC_W_CHAIN:
> -    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
> -    case Intrinsic::arm_neon_vld1:
> -    case Intrinsic::arm_neon_vld2:
> -    case Intrinsic::arm_neon_vld3:
> -    case Intrinsic::arm_neon_vld4:
> -    case Intrinsic::arm_neon_vst1:
> -    case Intrinsic::arm_neon_vst2:
> -    case Intrinsic::arm_neon_vst3:
> -    case Intrinsic::arm_neon_vst4:
> -    case Intrinsic::arm_neon_vld2lane:
> -    case Intrinsic::arm_neon_vld3lane:
> -    case Intrinsic::arm_neon_vld4lane:
> -    case Intrinsic::aarch64_neon_vld1x2:
> -    case Intrinsic::aarch64_neon_vld1x3:
> -    case Intrinsic::aarch64_neon_vld1x4:
> -    case Intrinsic::aarch64_neon_vst1x2:
> -    case Intrinsic::aarch64_neon_vst1x3:
> -    case Intrinsic::aarch64_neon_vst1x4:
> -    case Intrinsic::arm_neon_vst2lane:
> -    case Intrinsic::arm_neon_vst3lane:
> -    case Intrinsic::arm_neon_vst4lane:
> -      return CombineBaseUpdate(N, DCI);
> -    default:
> -      break;
> -    }
> -  }
> -  return SDValue();
> -}
> -
> -bool
> -AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
> -  VT = VT.getScalarType();
> -
> -  if (!VT.isSimple())
> -    return false;
> -
> -  switch (VT.getSimpleVT().SimpleTy) {
> -  case MVT::f16:
> -  case MVT::f32:
> -  case MVT::f64:
> -    return true;
> -  case MVT::f128:
> -    return false;
> -  default:
> -    break;
> -  }
> -
> -  return false;
> -}
> -
> -bool AArch64TargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
> -                                                          unsigned AddrSpace,
> -                                                          bool *Fast) const {
> -  const AArch64Subtarget *Subtarget = getSubtarget();
> -  // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
> -  bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
> -
> -  switch (VT.getSimpleVT().SimpleTy) {
> -  default:
> -    return false;
> -  // Scalar types
> -  case MVT::i8:  case MVT::i16:
> -  case MVT::i32: case MVT::i64:
> -  case MVT::f32: case MVT::f64: {
> -    // Unaligned access can use (for example) LRDB, LRDH, LDRW
> -    if (AllowsUnaligned) {
> -      if (Fast)
> -        *Fast = true;
> -      return true;
> -    }
> -    return false;
> -  }
> -  // 64-bit vector types
> -  case MVT::v8i8:  case MVT::v4i16:
> -  case MVT::v2i32: case MVT::v1i64:
> -  case MVT::v2f32: case MVT::v1f64:
> -  // 128-bit vector types
> -  case MVT::v16i8: case MVT::v8i16:
> -  case MVT::v4i32: case MVT::v2i64:
> -  case MVT::v4f32: case MVT::v2f64: {
> -    // For any little-endian targets with neon, we can support unaligned
> -    // load/store of V registers using ld1/st1.
> -    // A big-endian target may also explicitly support unaligned accesses
> -    if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
> -      if (Fast)
> -        *Fast = true;
> -      return true;
> -    }
> -    return false;
> -  }
> -  }
> -}
> -
> -// Check whether a shuffle_vector could be presented as concat_vector.
> -bool AArch64TargetLowering::isConcatVector(SDValue Op, SelectionDAG &DAG,
> -                                           SDValue V0, SDValue V1,
> -                                           const int *Mask,
> -                                           SDValue &Res) const {
> -  SDLoc DL(Op);
> -  EVT VT = Op.getValueType();
> -  if (VT.getSizeInBits() != 128)
> -    return false;
> -  if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
> -      VT.getVectorElementType() != V1.getValueType().getVectorElementType())
> -    return false;
> -
> -  unsigned NumElts = VT.getVectorNumElements();
> -  bool isContactVector = true;
> -  bool splitV0 = false;
> -  if (V0.getValueType().getSizeInBits() == 128)
> -    splitV0 = true;
> -
> -  for (int I = 0, E = NumElts / 2; I != E; I++) {
> -    if (Mask[I] != I) {
> -      isContactVector = false;
> -      break;
> -    }
> -  }
> -
> -  if (isContactVector) {
> -    int offset = NumElts / 2;
> -    for (int I = NumElts / 2, E = NumElts; I != E; I++) {
> -      if (Mask[I] != I + splitV0 * offset) {
> -        isContactVector = false;
> -        break;
> -      }
> -    }
> -  }
> -
> -  if (isContactVector) {
> -    EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
> -                                  NumElts / 2);
> -    if (splitV0) {
> -      V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
> -                       DAG.getConstant(0, MVT::i64));
> -    }
> -    if (V1.getValueType().getSizeInBits() == 128) {
> -      V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
> -                       DAG.getConstant(0, MVT::i64));
> -    }
> -    Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
> -    return true;
> -  }
> -  return false;
> -}
> -
> -// Check whether a Build Vector could be presented as Shuffle Vector.
> -// This Shuffle Vector maybe not legalized, so the length of its operand and
> -// the length of result may not equal.
> -bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
> -                                                 SDValue &V0, SDValue &V1,
> -                                                 int *Mask) const {
> -  SDLoc DL(Op);
> -  EVT VT = Op.getValueType();
> -  unsigned NumElts = VT.getVectorNumElements();
> -  unsigned V0NumElts = 0;
> -
> -  // Check if all elements are extracted from less than 3 vectors.
> -  for (unsigned i = 0; i < NumElts; ++i) {
> -    SDValue Elt = Op.getOperand(i);
> -    if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
> -        Elt.getOperand(0).getValueType().getVectorElementType() !=
> -            VT.getVectorElementType())
> -      return false;
> -
> -    if (!V0.getNode()) {
> -      V0 = Elt.getOperand(0);
> -      V0NumElts = V0.getValueType().getVectorNumElements();
> -    }
> -    if (Elt.getOperand(0) == V0) {
> -      Mask[i] = (cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue());
> -      continue;
> -    } else if (!V1.getNode()) {
> -      V1 = Elt.getOperand(0);
> -    }
> -    if (Elt.getOperand(0) == V1) {
> -      unsigned Lane = cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue();
> -      Mask[i] = (Lane + V0NumElts);
> -      continue;
> -    } else {
> -      return false;
> -    }
> -  }
> -  return true;
> -}
> -
> -// LowerShiftRightParts - Lower SRL_PARTS and SRA_PARTS, which returns two
> -/// i64 values and take a 2 x i64 value to shift plus a shift amount.
> -SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
> -                                                SelectionDAG &DAG) const {
> -  assert(Op.getNumOperands() == 3 && "Not a quad-shift!");
> -  EVT VT = Op.getValueType();
> -  unsigned VTBits = VT.getSizeInBits();
> -  SDLoc dl(Op);
> -  SDValue ShOpLo = Op.getOperand(0);
> -  SDValue ShOpHi = Op.getOperand(1);
> -  SDValue ShAmt  = Op.getOperand(2);
> -  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
> -
> -  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
> -  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
> -                                 DAG.getConstant(VTBits, MVT::i64), ShAmt);
> -  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
> -  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
> -                                   DAG.getConstant(VTBits, MVT::i64));
> -  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
> -  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
> -  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
> -  SDValue Tmp3 = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
> -
> -  SDValue A64cc;
> -  SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt,
> -                                        DAG.getConstant(0, MVT::i64),
> -                                        ISD::SETGE, A64cc,
> -                                        DAG, dl);
> -
> -  SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
> -                           DAG.getConstant(0, Tmp3.getValueType()), Tmp3,
> -                           A64cc);
> -  SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
> -                           TrueVal, FalseVal, A64cc);
> -
> -  SDValue Ops[2] = { Lo, Hi };
> -  return DAG.getMergeValues(Ops, dl);
> -}
> -
> -/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
> -/// i64 values and take a 2 x i64 value to shift plus a shift amount.
> -SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
> -                                               SelectionDAG &DAG) const {
> -  assert(Op.getNumOperands() == 3 && "Not a quad-shift!");
> -  EVT VT = Op.getValueType();
> -  unsigned VTBits = VT.getSizeInBits();
> -  SDLoc dl(Op);
> -  SDValue ShOpLo = Op.getOperand(0);
> -  SDValue ShOpHi = Op.getOperand(1);
> -  SDValue ShAmt  = Op.getOperand(2);
> -
> -  assert(Op.getOpcode() == ISD::SHL_PARTS);
> -  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
> -                                 DAG.getConstant(VTBits, MVT::i64), ShAmt);
> -  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
> -  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
> -                                   DAG.getConstant(VTBits, MVT::i64));
> -  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
> -  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
> -  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
> -  SDValue Tmp4 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
> -
> -  SDValue A64cc;
> -  SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt,
> -                                        DAG.getConstant(0, MVT::i64),
> -                                        ISD::SETGE, A64cc,
> -                                        DAG, dl);
> -
> -  SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
> -                           DAG.getConstant(0, Tmp4.getValueType()), Tmp4,
> -                           A64cc);
> -  SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
> -                           Tmp3, FalseVal, A64cc);
> -
> -  SDValue Ops[2] = { Lo, Hi };
> -  return DAG.getMergeValues(Ops, dl);
> -}
> -
> -// If this is a case we can't handle, return null and let the default
> -// expansion code take care of it.
> -SDValue
> -AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
> -                                         const AArch64Subtarget *ST) const {
> -
> -  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
> -  SDLoc DL(Op);
> -  EVT VT = Op.getValueType();
> -
> -  APInt SplatBits, SplatUndef;
> -  unsigned SplatBitSize;
> -  bool HasAnyUndefs;
> -
> -  unsigned UseNeonMov = VT.getSizeInBits() >= 64;
> -
> -  // Note we favor lowering MOVI over MVNI.
> -  // This has implications on the definition of patterns in TableGen to select
> -  // BIC immediate instructions but not ORR immediate instructions.
> -  // If this lowering order is changed, TableGen patterns for BIC immediate and
> -  // ORR immediate instructions have to be updated.
> -  if (UseNeonMov &&
> -      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
> -    if (SplatBitSize <= 64) {
> -      // First attempt to use vector immediate-form MOVI
> -      EVT NeonMovVT;
> -      unsigned Imm = 0;
> -      unsigned OpCmode = 0;
> -
> -      if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
> -                            SplatBitSize, DAG, VT.is128BitVector(),
> -                            Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) {
> -        SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
> -        SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
> -
> -        if (ImmVal.getNode() && OpCmodeVal.getNode()) {
> -          SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT,
> -                                        ImmVal, OpCmodeVal);
> -          return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
> -        }
> -      }
> -
> -      // Then attempt to use vector immediate-form MVNI
> -      uint64_t NegatedImm = (~SplatBits).getZExtValue();
> -      if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
> -                            DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT,
> -                            Imm, OpCmode)) {
> -        SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
> -        SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
> -        if (ImmVal.getNode() && OpCmodeVal.getNode()) {
> -          SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT,
> -                                        ImmVal, OpCmodeVal);
> -          return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
> -        }
> -      }
> -
> -      // Attempt to use vector immediate-form FMOV
> -      if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) ||
> -          (VT == MVT::v2f64 && SplatBitSize == 64)) {
> -        APFloat RealVal(
> -            SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble,
> -            SplatBits);
> -        uint32_t ImmVal;
> -        if (A64Imms::isFPImm(RealVal, ImmVal)) {
> -          SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
> -          return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val);
> -        }
> -      }
> -    }
> -  }
> -
> -  unsigned NumElts = VT.getVectorNumElements();
> -  bool isOnlyLowElement = true;
> -  bool usesOnlyOneValue = true;
> -  bool hasDominantValue = false;
> -  bool isConstant = true;
> -
> -  // Map of the number of times a particular SDValue appears in the
> -  // element list.
> -  DenseMap<SDValue, unsigned> ValueCounts;
> -  SDValue Value;
> -  for (unsigned i = 0; i < NumElts; ++i) {
> -    SDValue V = Op.getOperand(i);
> -    if (V.getOpcode() == ISD::UNDEF)
> -      continue;
> -    if (i > 0)
> -      isOnlyLowElement = false;
> -    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
> -      isConstant = false;
> -
> -    ValueCounts.insert(std::make_pair(V, 0));
> -    unsigned &Count = ValueCounts[V];
> -
> -    // Is this value dominant? (takes up more than half of the lanes)
> -    if (++Count > (NumElts / 2)) {
> -      hasDominantValue = true;
> -      Value = V;
> -    }
> -  }
> -  if (ValueCounts.size() != 1)
> -    usesOnlyOneValue = false;
> -  if (!Value.getNode() && ValueCounts.size() > 0)
> -    Value = ValueCounts.begin()->first;
> -
> -  if (ValueCounts.size() == 0)
> -    return DAG.getUNDEF(VT);
> -
> -  if (isOnlyLowElement)
> -    return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
> -
> -  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
> -  if (hasDominantValue && EltSize <= 64) {
> -    // Use VDUP for non-constant splats.
> -    if (!isConstant) {
> -      SDValue N;
> -
> -      // If we are DUPing a value that comes directly from a vector, we could
> -      // just use DUPLANE. We can only do this if the lane being extracted
> -      // is at a constant index, as the DUP from lane instructions only have
> -      // constant-index forms.
> -      //
> -      // If there is a TRUNCATE between EXTRACT_VECTOR_ELT and DUP, we can
> -      // remove TRUNCATE for DUPLANE by apdating the source vector to
> -      // appropriate vector type and lane index.
> -      //
> -      // FIXME: for now we have v1i8, v1i16, v1i32 legal vector types, if they
> -      // are not legal any more, no need to check the type size in bits should
> -      // be large than 64.
> -      SDValue V = Value;
> -      if (Value->getOpcode() == ISD::TRUNCATE)
> -        V = Value->getOperand(0);
> -      if (V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
> -          isa<ConstantSDNode>(V->getOperand(1)) &&
> -          V->getOperand(0).getValueType().getSizeInBits() >= 64) {
> -
> -        // If the element size of source vector is larger than DUPLANE
> -        // element size, we can do transformation by,
> -        // 1) bitcasting source register to smaller element vector
> -        // 2) mutiplying the lane index by SrcEltSize/ResEltSize
> -        // For example, we can lower
> -        //     "v8i16 vdup_lane(v4i32, 1)"
> -        // to be
> -        //     "v8i16 vdup_lane(v8i16 bitcast(v4i32), 2)".
> -        SDValue SrcVec = V->getOperand(0);
> -        unsigned SrcEltSize =
> -            SrcVec.getValueType().getVectorElementType().getSizeInBits();
> -        unsigned ResEltSize = VT.getVectorElementType().getSizeInBits();
> -        if (SrcEltSize > ResEltSize) {
> -          assert((SrcEltSize % ResEltSize == 0) && "Invalid element size");
> -          SDValue BitCast;
> -          unsigned SrcSize = SrcVec.getValueType().getSizeInBits();
> -          unsigned ResSize = VT.getSizeInBits();
> -
> -          if (SrcSize > ResSize) {
> -            assert((SrcSize % ResSize == 0) && "Invalid vector size");
> -            EVT CastVT =
> -                EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
> -                                 SrcSize / ResEltSize);
> -            BitCast = DAG.getNode(ISD::BITCAST, DL, CastVT, SrcVec);
> -          } else {
> -            assert((SrcSize == ResSize) && "Invalid vector size of source vec");
> -            BitCast = DAG.getNode(ISD::BITCAST, DL, VT, SrcVec);
> -          }
> -
> -          unsigned LaneIdx = V->getConstantOperandVal(1);
> -          SDValue Lane =
> -              DAG.getConstant((SrcEltSize / ResEltSize) * LaneIdx, MVT::i64);
> -          N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, BitCast, Lane);
> -        } else {
> -          assert((SrcEltSize == ResEltSize) &&
> -                 "Invalid element size of source vec");
> -          N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, V->getOperand(0),
> -                          V->getOperand(1));
> -        }
> -      } else
> -        N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
> -
> -      if (!usesOnlyOneValue) {
> -        // The dominant value was splatted as 'N', but we now have to insert
> -        // all differing elements.
> -        for (unsigned I = 0; I < NumElts; ++I) {
> -          if (Op.getOperand(I) == Value)
> -            continue;
> -          SmallVector<SDValue, 3> Ops;
> -          Ops.push_back(N);
> -          Ops.push_back(Op.getOperand(I));
> -          Ops.push_back(DAG.getConstant(I, MVT::i64));
> -          N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Ops);
> -        }
> -      }
> -      return N;
> -    }
> -    if (usesOnlyOneValue && isConstant) {
> -      return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
> -    }
> -  }
> -  // If all elements are constants and the case above didn't get hit, fall back
> -  // to the default expansion, which will generate a load from the constant
> -  // pool.
> -  if (isConstant)
> -    return SDValue();
> -
> -  // Try to lower this in lowering ShuffleVector way.
> -  SDValue V0, V1;
> -  int Mask[16];
> -  if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) {
> -    unsigned V0NumElts = V0.getValueType().getVectorNumElements();
> -    if (!V1.getNode() && V0NumElts == NumElts * 2) {
> -      V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
> -                       DAG.getConstant(NumElts, MVT::i64));
> -      V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
> -                       DAG.getConstant(0, MVT::i64));
> -      V0NumElts = V0.getValueType().getVectorNumElements();
> -    }
> -
> -    if (V1.getNode() && NumElts == V0NumElts &&
> -        V0NumElts == V1.getValueType().getVectorNumElements()) {
> -      SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
> -      if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
> -        return Shuffle;
> -      else
> -        return LowerVECTOR_SHUFFLE(Shuffle, DAG);
> -    } else {
> -      SDValue Res;
> -      if (isConcatVector(Op, DAG, V0, V1, Mask, Res))
> -        return Res;
> -    }
> -  }
> -
> -  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
> -  // know the default expansion would otherwise fall back on something even
> -  // worse. For a vector with one or two non-undef values, that's
> -  // scalar_to_vector for the elements followed by a shuffle (provided the
> -  // shuffle is valid for the target) and materialization element by element
> -  // on the stack followed by a load for everything else.
> -  if (!isConstant && !usesOnlyOneValue) {
> -    SDValue Vec = DAG.getUNDEF(VT);
> -    for (unsigned i = 0 ; i < NumElts; ++i) {
> -      SDValue V = Op.getOperand(i);
> -      if (V.getOpcode() == ISD::UNDEF)
> -        continue;
> -      SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
> -      Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx);
> -    }
> -    return Vec;
> -  }
> -  return SDValue();
> -}
> -
> -/// isREVMask - Check if a vector shuffle corresponds to a REV
> -/// instruction with the specified blocksize.  (The order of the elements
> -/// within each block of the vector is reversed.)
> -static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
> -  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
> -         "Only possible block sizes for REV are: 16, 32, 64");
> -
> -  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
> -  if (EltSz == 64)
> -    return false;
> -
> -  unsigned NumElts = VT.getVectorNumElements();
> -  unsigned BlockElts = M[0] + 1;
> -  // If the first shuffle index is UNDEF, be optimistic.
> -  if (M[0] < 0)
> -    BlockElts = BlockSize / EltSz;
> -
> -  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
> -    return false;
> -
> -  for (unsigned i = 0; i < NumElts; ++i) {
> -    if (M[i] < 0)
> -      continue; // ignore UNDEF indices
> -    if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
> -      return false;
> -  }
> -
> -  return true;
> -}
> -
> -// isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and
> -// TRN instruction.
> -static unsigned isPermuteMask(ArrayRef<int> M, EVT VT, bool isV2undef) {
> -  unsigned NumElts = VT.getVectorNumElements();
> -  if (NumElts < 4)
> -    return 0;
> -
> -  bool ismatch = true;
> -
> -  // Check UZP1
> -  for (unsigned i = 0; i < NumElts; ++i) {
> -    unsigned answer = i * 2;
> -    if (isV2undef && answer >= NumElts)
> -      answer -= NumElts;
> -    if (M[i] != -1 && (unsigned)M[i] != answer) {
> -      ismatch = false;
> -      break;
> -    }
> -  }
> -  if (ismatch)
> -    return AArch64ISD::NEON_UZP1;
> -
> -  // Check UZP2
> -  ismatch = true;
> -  for (unsigned i = 0; i < NumElts; ++i) {
> -    unsigned answer = i * 2 + 1;
> -    if (isV2undef && answer >= NumElts)
> -      answer -= NumElts;
> -    if (M[i] != -1 && (unsigned)M[i] != answer) {
> -      ismatch = false;
> -      break;
> -    }
> -  }
> -  if (ismatch)
> -    return AArch64ISD::NEON_UZP2;
> -
> -  // Check ZIP1
> -  ismatch = true;
> -  for (unsigned i = 0; i < NumElts; ++i) {
> -    unsigned answer = i / 2 + NumElts * (i % 2);
> -    if (isV2undef && answer >= NumElts)
> -      answer -= NumElts;
> -    if (M[i] != -1 && (unsigned)M[i] != answer) {
> -      ismatch = false;
> -      break;
> -    }
> -  }
> -  if (ismatch)
> -    return AArch64ISD::NEON_ZIP1;
> -
> -  // Check ZIP2
> -  ismatch = true;
> -  for (unsigned i = 0; i < NumElts; ++i) {
> -    unsigned answer = (NumElts + i) / 2 + NumElts * (i % 2);
> -    if (isV2undef && answer >= NumElts)
> -      answer -= NumElts;
> -    if (M[i] != -1 && (unsigned)M[i] != answer) {
> -      ismatch = false;
> -      break;
> -    }
> -  }
> -  if (ismatch)
> -    return AArch64ISD::NEON_ZIP2;
> -
> -  // Check TRN1
> -  ismatch = true;
> -  for (unsigned i = 0; i < NumElts; ++i) {
> -    unsigned answer = i + (NumElts - 1) * (i % 2);
> -    if (isV2undef && answer >= NumElts)
> -      answer -= NumElts;
> -    if (M[i] != -1 && (unsigned)M[i] != answer) {
> -      ismatch = false;
> -      break;
> -    }
> -  }
> -  if (ismatch)
> -    return AArch64ISD::NEON_TRN1;
> -
> -  // Check TRN2
> -  ismatch = true;
> -  for (unsigned i = 0; i < NumElts; ++i) {
> -    unsigned answer = 1 + i + (NumElts - 1) * (i % 2);
> -    if (isV2undef && answer >= NumElts)
> -      answer -= NumElts;
> -    if (M[i] != -1 && (unsigned)M[i] != answer) {
> -      ismatch = false;
> -      break;
> -    }
> -  }
> -  if (ismatch)
> -    return AArch64ISD::NEON_TRN2;
> -
> -  return 0;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
> -                                           SelectionDAG &DAG) const {
> -  SDValue V1 = Op.getOperand(0);
> -  SDValue V2 = Op.getOperand(1);
> -  SDLoc dl(Op);
> -  EVT VT = Op.getValueType();
> -  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
> -
> -  // Convert shuffles that are directly supported on NEON to target-specific
> -  // DAG nodes, instead of keeping them as shuffles and matching them again
> -  // during code selection.  This is more efficient and avoids the possibility
> -  // of inconsistencies between legalization and selection.
> -  ArrayRef<int> ShuffleMask = SVN->getMask();
> -
> -  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
> -  if (EltSize > 64)
> -    return SDValue();
> -
> -  if (isREVMask(ShuffleMask, VT, 64))
> -    return DAG.getNode(AArch64ISD::NEON_REV64, dl, VT, V1);
> -  if (isREVMask(ShuffleMask, VT, 32))
> -    return DAG.getNode(AArch64ISD::NEON_REV32, dl, VT, V1);
> -  if (isREVMask(ShuffleMask, VT, 16))
> -    return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1);
> -
> -  unsigned ISDNo;
> -  if (V2.getOpcode() == ISD::UNDEF)
> -    ISDNo = isPermuteMask(ShuffleMask, VT, true);
> -  else
> -    ISDNo = isPermuteMask(ShuffleMask, VT, false);
> -
> -  if (ISDNo) {
> -    if (V2.getOpcode() == ISD::UNDEF)
> -      return DAG.getNode(ISDNo, dl, VT, V1, V1);
> -    else
> -      return DAG.getNode(ISDNo, dl, VT, V1, V2);
> -  }
> -
> -  SDValue Res;
> -  if (isConcatVector(Op, DAG, V1, V2, &ShuffleMask[0], Res))
> -    return Res;
> -
> -  // If the element of shuffle mask are all the same constant, we can
> -  // transform it into either NEON_VDUP or NEON_VDUPLANE
> -  if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
> -    int Lane = SVN->getSplatIndex();
> -    // If this is undef splat, generate it via "just" vdup, if possible.
> -    if (Lane == -1) Lane = 0;
> -
> -    // Test if V1 is a SCALAR_TO_VECTOR.
> -    if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
> -      return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0));
> -    }
> -    // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR.
> -    if (V1.getOpcode() == ISD::BUILD_VECTOR) {
> -      bool IsScalarToVector = true;
> -      for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i)
> -        if (V1.getOperand(i).getOpcode() != ISD::UNDEF &&
> -            i != (unsigned)Lane) {
> -          IsScalarToVector = false;
> -          break;
> -        }
> -      if (IsScalarToVector)
> -        return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
> -                           V1.getOperand(Lane));
> -    }
> -
> -    // Test if V1 is a EXTRACT_SUBVECTOR.
> -    if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
> -      int ExtLane = cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
> -      return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1.getOperand(0),
> -                         DAG.getConstant(Lane + ExtLane, MVT::i64));
> -    }
> -    // Test if V1 is a CONCAT_VECTORS.
> -    if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
> -        V1.getOperand(1).getOpcode() == ISD::UNDEF) {
> -      SDValue Op0 = V1.getOperand(0);
> -      assert((unsigned)Lane < Op0.getValueType().getVectorNumElements() &&
> -             "Invalid vector lane access");
> -      return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, Op0,
> -                         DAG.getConstant(Lane, MVT::i64));
> -    }
> -
> -    return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
> -                       DAG.getConstant(Lane, MVT::i64));
> -  }
> -
> -  int Length = ShuffleMask.size();
> -  int V1EltNum = V1.getValueType().getVectorNumElements();
> -
> -  // If the number of v1 elements is the same as the number of shuffle mask
> -  // element and the shuffle masks are sequential values, we can transform
> -  // it into NEON_VEXTRACT.
> -  if (V1EltNum == Length) {
> -    // Check if the shuffle mask is sequential.
> -    int SkipUndef = 0;
> -    while (ShuffleMask[SkipUndef] == -1) {
> -      SkipUndef++;
> -    }
> -    int CurMask = ShuffleMask[SkipUndef];
> -    if (CurMask >= SkipUndef) {
> -      bool IsSequential = true;
> -      for (int I = SkipUndef; I < Length; ++I) {
> -        if (ShuffleMask[I] != -1 && ShuffleMask[I] != CurMask) {
> -          IsSequential = false;
> -          break;
> -        }
> -        CurMask++;
> -      }
> -      if (IsSequential) {
> -        assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect");
> -        unsigned VecSize = EltSize * V1EltNum;
> -        unsigned Index = (EltSize / 8) * (ShuffleMask[SkipUndef] - SkipUndef);
> -        if (VecSize == 64 || VecSize == 128)
> -          return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2,
> -                             DAG.getConstant(Index, MVT::i64));
> -      }
> -    }
> -  }
> -
> -  // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert
> -  // by element from V2 to V1 .
> -  // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a
> -  // better choice to be inserted than V1 as less insert needed, so we count
> -  // element to be inserted for both V1 and V2, and select less one as insert
> -  // target.
> -
> -  // Collect elements need to be inserted and their index.
> -  SmallVector<int, 8> NV1Elt;
> -  SmallVector<int, 8> N1Index;
> -  SmallVector<int, 8> NV2Elt;
> -  SmallVector<int, 8> N2Index;
> -  for (int I = 0; I != Length; ++I) {
> -    if (ShuffleMask[I] != I) {
> -      NV1Elt.push_back(ShuffleMask[I]);
> -      N1Index.push_back(I);
> -    }
> -  }
> -  for (int I = 0; I != Length; ++I) {
> -    if (ShuffleMask[I] != (I + V1EltNum)) {
> -      NV2Elt.push_back(ShuffleMask[I]);
> -      N2Index.push_back(I);
> -    }
> -  }
> -
> -  // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2
> -  // will be inserted.
> -  SDValue InsV = V1;
> -  SmallVector<int, 8> InsMasks = NV1Elt;
> -  SmallVector<int, 8> InsIndex = N1Index;
> -  if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) {
> -    if (NV1Elt.size() > NV2Elt.size()) {
> -      InsV = V2;
> -      InsMasks = NV2Elt;
> -      InsIndex = N2Index;
> -    }
> -  } else {
> -    InsV = DAG.getNode(ISD::UNDEF, dl, VT);
> -  }
> -
> -  for (int I = 0, E = InsMasks.size(); I != E; ++I) {
> -    SDValue ExtV = V1;
> -    int Mask = InsMasks[I];
> -    if (Mask >= V1EltNum) {
> -      ExtV = V2;
> -      Mask -= V1EltNum;
> -    }
> -    // Any value type smaller than i32 is illegal in AArch64, and this lower
> -    // function is called after legalize pass, so we need to legalize
> -    // the result here.
> -    EVT EltVT;
> -    if (VT.getVectorElementType().isFloatingPoint())
> -      EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32;
> -    else
> -      EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32;
> -
> -    if (Mask >= 0) {
> -      ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
> -                         DAG.getConstant(Mask, MVT::i64));
> -      InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV,
> -                         DAG.getConstant(InsIndex[I], MVT::i64));
> -    }
> -  }
> -  return InsV;
> -}
> -
> -AArch64TargetLowering::ConstraintType
> -AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
> -  if (Constraint.size() == 1) {
> -    switch (Constraint[0]) {
> -    default: break;
> -    case 'w': // An FP/SIMD vector register
> -      return C_RegisterClass;
> -    case 'I': // Constant that can be used with an ADD instruction
> -    case 'J': // Constant that can be used with a SUB instruction
> -    case 'K': // Constant that can be used with a 32-bit logical instruction
> -    case 'L': // Constant that can be used with a 64-bit logical instruction
> -    case 'M': // Constant that can be used as a 32-bit MOV immediate
> -    case 'N': // Constant that can be used as a 64-bit MOV immediate
> -    case 'Y': // Floating point constant zero
> -    case 'Z': // Integer constant zero
> -      return C_Other;
> -    case 'Q': // A memory reference with base register and no offset
> -      return C_Memory;
> -    case 'S': // A symbolic address
> -      return C_Other;
> -    }
> -  }
> -
> -  // FIXME: Ump, Utf, Usa, Ush
> -  // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes,
> -  //      whatever they may be
> -  // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
> -  // Usa: An absolute symbolic address
> -  // Ush: The high part (bits 32:12) of a pc-relative symbolic address
> -  assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa"
> -         && Constraint != "Ush" && "Unimplemented constraints");
> -
> -  return TargetLowering::getConstraintType(Constraint);
> -}
> -
> -TargetLowering::ConstraintWeight
> -AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info,
> -                                                const char *Constraint) const {
> -
> -  llvm_unreachable("Constraint weight unimplemented");
> -}
> -
> -void
> -AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
> -                                                    std::string &Constraint,
> -                                                    std::vector<SDValue> &Ops,
> -                                                    SelectionDAG &DAG) const {
> -  SDValue Result;
> -
> -  // Only length 1 constraints are C_Other.
> -  if (Constraint.size() != 1) return;
> -
> -  // Only C_Other constraints get lowered like this. That means constants for us
> -  // so return early if there's no hope the constraint can be lowered.
> -
> -  switch(Constraint[0]) {
> -  default: break;
> -  case 'I': case 'J': case 'K': case 'L':
> -  case 'M': case 'N': case 'Z': {
> -    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
> -    if (!C)
> -      return;
> -
> -    uint64_t CVal = C->getZExtValue();
> -    uint32_t Bits;
> -
> -    switch (Constraint[0]) {
> -    default:
> -      // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J'
> -      // is a peculiarly useless SUB constraint.
> -      llvm_unreachable("Unimplemented C_Other constraint");
> -    case 'I':
> -      if (CVal <= 0xfff)
> -        break;
> -      return;
> -    case 'K':
> -      if (A64Imms::isLogicalImm(32, CVal, Bits))
> -        break;
> -      return;
> -    case 'L':
> -      if (A64Imms::isLogicalImm(64, CVal, Bits))
> -        break;
> -      return;
> -    case 'Z':
> -      if (CVal == 0)
> -        break;
> -      return;
> -    }
> -
> -    Result = DAG.getTargetConstant(CVal, Op.getValueType());
> -    break;
> -  }
> -  case 'S': {
> -    // An absolute symbolic address or label reference.
> -    if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
> -      Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
> -                                          GA->getValueType(0));
> -    } else if (const BlockAddressSDNode *BA
> -                 = dyn_cast<BlockAddressSDNode>(Op)) {
> -      Result = DAG.getTargetBlockAddress(BA->getBlockAddress(),
> -                                         BA->getValueType(0));
> -    } else if (const ExternalSymbolSDNode *ES
> -                 = dyn_cast<ExternalSymbolSDNode>(Op)) {
> -      Result = DAG.getTargetExternalSymbol(ES->getSymbol(),
> -                                           ES->getValueType(0));
> -    } else
> -      return;
> -    break;
> -  }
> -  case 'Y':
> -    if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
> -      if (CFP->isExactlyValue(0.0)) {
> -        Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0));
> -        break;
> -      }
> -    }
> -    return;
> -  }
> -
> -  if (Result.getNode()) {
> -    Ops.push_back(Result);
> -    return;
> -  }
> -
> -  // It's an unknown constraint for us. Let generic code have a go.
> -  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
> -}
> -
> -std::pair<unsigned, const TargetRegisterClass*>
> -AArch64TargetLowering::getRegForInlineAsmConstraint(
> -                                                  const std::string &Constraint,
> -                                                  MVT VT) const {
> -  if (Constraint.size() == 1) {
> -    switch (Constraint[0]) {
> -    case 'r':
> -      if (VT.getSizeInBits() <= 32)
> -        return std::make_pair(0U, &AArch64::GPR32RegClass);
> -      else if (VT == MVT::i64)
> -        return std::make_pair(0U, &AArch64::GPR64RegClass);
> -      break;
> -    case 'w':
> -      if (VT == MVT::f16)
> -        return std::make_pair(0U, &AArch64::FPR16RegClass);
> -      else if (VT == MVT::f32)
> -        return std::make_pair(0U, &AArch64::FPR32RegClass);
> -      else if (VT.getSizeInBits() == 64)
> -        return std::make_pair(0U, &AArch64::FPR64RegClass);
> -      else if (VT.getSizeInBits() == 128)
> -        return std::make_pair(0U, &AArch64::FPR128RegClass);
> -      break;
> -    }
> -  }
> -
> -  // Use the default implementation in TargetLowering to convert the register
> -  // constraint into a member of a register class.
> -  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
> -}
> -
> -/// Represent NEON load and store intrinsics as MemIntrinsicNodes.
> -/// The associated MachineMemOperands record the alignment specified
> -/// in the intrinsic calls.
> -bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
> -                                               const CallInst &I,
> -                                               unsigned Intrinsic) const {
> -  switch (Intrinsic) {
> -  case Intrinsic::arm_neon_vld1:
> -  case Intrinsic::arm_neon_vld2:
> -  case Intrinsic::arm_neon_vld3:
> -  case Intrinsic::arm_neon_vld4:
> -  case Intrinsic::aarch64_neon_vld1x2:
> -  case Intrinsic::aarch64_neon_vld1x3:
> -  case Intrinsic::aarch64_neon_vld1x4:
> -  case Intrinsic::arm_neon_vld2lane:
> -  case Intrinsic::arm_neon_vld3lane:
> -  case Intrinsic::arm_neon_vld4lane: {
> -    Info.opc = ISD::INTRINSIC_W_CHAIN;
> -    // Conservatively set memVT to the entire set of vectors loaded.
> -    uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
> -    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
> -    Info.ptrVal = I.getArgOperand(0);
> -    Info.offset = 0;
> -    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
> -    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
> -    Info.vol = false; // volatile loads with NEON intrinsics not supported
> -    Info.readMem = true;
> -    Info.writeMem = false;
> -    return true;
> -  }
> -  case Intrinsic::arm_neon_vst1:
> -  case Intrinsic::arm_neon_vst2:
> -  case Intrinsic::arm_neon_vst3:
> -  case Intrinsic::arm_neon_vst4:
> -  case Intrinsic::aarch64_neon_vst1x2:
> -  case Intrinsic::aarch64_neon_vst1x3:
> -  case Intrinsic::aarch64_neon_vst1x4:
> -  case Intrinsic::arm_neon_vst2lane:
> -  case Intrinsic::arm_neon_vst3lane:
> -  case Intrinsic::arm_neon_vst4lane: {
> -    Info.opc = ISD::INTRINSIC_VOID;
> -    // Conservatively set memVT to the entire set of vectors stored.
> -    unsigned NumElts = 0;
> -    for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
> -      Type *ArgTy = I.getArgOperand(ArgI)->getType();
> -      if (!ArgTy->isVectorTy())
> -        break;
> -      NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
> -    }
> -    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
> -    Info.ptrVal = I.getArgOperand(0);
> -    Info.offset = 0;
> -    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
> -    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
> -    Info.vol = false; // volatile stores with NEON intrinsics not supported
> -    Info.readMem = false;
> -    Info.writeMem = true;
> -    return true;
> -  }
> -  default:
> -    break;
> -  }
> -
> -  return false;
> -}
> -
> -// Truncations from 64-bit GPR to 32-bit GPR is free.
> -bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
> -  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
> -    return false;
> -  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
> -  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
> -  if (NumBits1 <= NumBits2)
> -    return false;
> -  return true;
> -}
> -
> -bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
> -  if (!VT1.isInteger() || !VT2.isInteger())
> -    return false;
> -  unsigned NumBits1 = VT1.getSizeInBits();
> -  unsigned NumBits2 = VT2.getSizeInBits();
> -  if (NumBits1 <= NumBits2)
> -    return false;
> -  return true;
> -}
> -
> -// All 32-bit GPR operations implicitly zero the high-half of the corresponding
> -// 64-bit GPR.
> -bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
> -  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
> -    return false;
> -  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
> -  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
> -  if (NumBits1 == 32 && NumBits2 == 64)
> -    return true;
> -  return false;
> -}
> -
> -bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
> -  if (!VT1.isInteger() || !VT2.isInteger())
> -    return false;
> -  unsigned NumBits1 = VT1.getSizeInBits();
> -  unsigned NumBits2 = VT2.getSizeInBits();
> -  if (NumBits1 == 32 && NumBits2 == 64)
> -    return true;
> -  return false;
> -}
> -
> -bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
> -  EVT VT1 = Val.getValueType();
> -  if (isZExtFree(VT1, VT2)) {
> -    return true;
> -  }
> -
> -  if (Val.getOpcode() != ISD::LOAD)
> -    return false;
> -
> -  // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
> -  return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() &&
> -          VT2.isInteger() && VT1.getSizeInBits() <= 32);
> -}
> -
> -// isLegalAddressingMode - Return true if the addressing mode represented
> -/// by AM is legal for this target, for a load/store of the specified type.
> -bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
> -                                                Type *Ty) const {
> -  // AArch64 has five basic addressing modes:
> -  //  reg
> -  //  reg + 9-bit signed offset
> -  //  reg + SIZE_IN_BYTES * 12-bit unsigned offset
> -  //  reg1 + reg2
> -  //  reg + SIZE_IN_BYTES * reg
> -
> -  // No global is ever allowed as a base.
> -  if (AM.BaseGV)
> -    return false;
> -
> -  // No reg+reg+imm addressing.
> -  if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
> -    return false;
> -
> -  // check reg + imm case:
> -  // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
> -  uint64_t NumBytes = 0;
> -  if (Ty->isSized()) {
> -    uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty);
> -    NumBytes = NumBits / 8;
> -    if (!isPowerOf2_64(NumBits))
> -      NumBytes = 0;
> -  }
> -
> -  if (!AM.Scale) {
> -    int64_t Offset = AM.BaseOffs;
> -
> -    // 9-bit signed offset
> -    if (Offset >= -(1LL << 9) && Offset <= (1LL << 9) - 1)
> -      return true;
> -
> -    // 12-bit unsigned offset
> -    unsigned shift = Log2_64(NumBytes);
> -    if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
> -        // Must be a multiple of NumBytes (NumBytes is a power of 2)
> -        (Offset >> shift) << shift == Offset)
> -      return true;
> -    return false;
> -  }
> -  if (!AM.Scale || AM.Scale == 1 ||
> -      (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes))
> -    return true;
> -  return false;
> -}
> -
> -int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
> -                                              Type *Ty) const {
> -  // Scaling factors are not free at all.
> -  // Operands                     | Rt Latency
> -  // -------------------------------------------
> -  // Rt, [Xn, Xm]                 | 4
> -  // -------------------------------------------
> -  // Rt, [Xn, Xm, lsl #imm]       | Rn: 4 Rm: 5
> -  // Rt, [Xn, Wm, <extend> #imm]  |
> -  if (isLegalAddressingMode(AM, Ty))
> -    // Scale represents reg2 * scale, thus account for 1 if
> -    // it is not equal to 0 or 1.
> -    return AM.Scale != 0 && AM.Scale != 1;
> -  return -1;
> -}
> -
> -/// getMaximalGlobalOffset - Returns the maximal possible offset which can
> -/// be used for loads / stores from the global.
> -unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
> -  return 4095;
> -}
> -
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (removed)
> @@ -1,410 +0,0 @@
> -//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file defines the interfaces that AArch64 uses to lower LLVM code into a
> -// selection DAG.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H
> -#define LLVM_TARGET_AARCH64_ISELLOWERING_H
> -
> -#include "Utils/AArch64BaseInfo.h"
> -#include "llvm/CodeGen/CallingConvLower.h"
> -#include "llvm/CodeGen/SelectionDAG.h"
> -#include "llvm/IR/Intrinsics.h"
> -#include "llvm/Target/TargetLowering.h"
> -
> -namespace llvm {
> -namespace AArch64ISD {
> -  enum NodeType {
> -    // Start the numbering from where ISD NodeType finishes.
> -    FIRST_NUMBER = ISD::BUILTIN_OP_END,
> -
> -    // This is a conditional branch which also notes the flag needed
> -    // (eq/sgt/...). A64 puts this information on the branches rather than
> -    // compares as LLVM does.
> -    BR_CC,
> -
> -    // A node to be selected to an actual call operation: either BL or BLR in
> -    // the absence of tail calls.
> -    Call,
> -
> -    // Indicates a floating-point immediate which fits into the format required
> -    // by the FMOV instructions. First (and only) operand is the 8-bit encoded
> -    // value of that immediate.
> -    FPMOV,
> -
> -    // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS
> -    // and an LSB.
> -    EXTR,
> -
> -    // Wraps a load from the GOT, which should always be performed with a 64-bit
> -    // load instruction. This prevents the DAG combiner folding a truncate to
> -    // form a smaller memory access.
> -    GOTLoad,
> -
> -    // Performs a bitfield insert. Arguments are: the value being inserted into;
> -    // the value being inserted; least significant bit changed; width of the
> -    // field.
> -    BFI,
> -
> -    // Simply a convenient node inserted during ISelLowering to represent
> -    // procedure return. Will almost certainly be selected to "RET".
> -    Ret,
> -
> -    /// Extracts a field of contiguous bits from the source and sign extends
> -    /// them into a single register. Arguments are: source; immr; imms. Note
> -    /// these are pre-encoded since DAG matching can't cope with combining LSB
> -    /// and Width into these values itself.
> -    SBFX,
> -
> -    /// This is an A64-ification of the standard LLVM SELECT_CC operation. The
> -    /// main difference is that it only has the values and an A64 condition,
> -    /// which will be produced by a setcc instruction.
> -    SELECT_CC,
> -
> -    /// This serves most of the functions of the LLVM SETCC instruction, for two
> -    /// purposes. First, it prevents optimisations from fiddling with the
> -    /// compare after we've moved the CondCode information onto the SELECT_CC or
> -    /// BR_CC instructions. Second, it gives a legal instruction for the actual
> -    /// comparison.
> -    ///
> -    /// It keeps a record of the condition flags asked for because certain
> -    /// instructions are only valid for a subset of condition codes.
> -    SETCC,
> -
> -    // Designates a node which is a tail call: both a call and a return
> -    // instruction as far as selction is concerned. It should be selected to an
> -    // unconditional branch. Has the usual plethora of call operands, but: 1st
> -    // is callee, 2nd is stack adjustment required immediately before branch.
> -    TC_RETURN,
> -
> -    // Designates a call used to support the TLS descriptor ABI. The call itself
> -    // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall
> -    // var") must be attached somehow during code generation. It takes two
> -    // operands: the callee and the symbol to be relocated against.
> -    TLSDESCCALL,
> -
> -    // Leaf node which will be lowered to an appropriate MRS to obtain the
> -    // thread pointer: TPIDR_EL0.
> -    THREAD_POINTER,
> -
> -    /// Extracts a field of contiguous bits from the source and zero extends
> -    /// them into a single register. Arguments are: source; immr; imms. Note
> -    /// these are pre-encoded since DAG matching can't cope with combining LSB
> -    /// and Width into these values itself.
> -    UBFX,
> -
> -    // Wraps an address which the ISelLowering phase has decided should be
> -    // created using the large memory model style: i.e. a sequence of four
> -    // movz/movk instructions.
> -    WrapperLarge,
> -
> -    // Wraps an address which the ISelLowering phase has decided should be
> -    // created using the small memory model style: i.e. adrp/add or
> -    // adrp/mem-op. This exists to prevent bare TargetAddresses which may never
> -    // get selected.
> -    WrapperSmall,
> -
> -    // Vector move immediate
> -    NEON_MOVIMM,
> -
> -    // Vector Move Inverted Immediate
> -    NEON_MVNIMM,
> -
> -    // Vector FP move immediate
> -    NEON_FMOVIMM,
> -
> -    // Vector permute
> -    NEON_UZP1,
> -    NEON_UZP2,
> -    NEON_ZIP1,
> -    NEON_ZIP2,
> -    NEON_TRN1,
> -    NEON_TRN2,
> -
> -    // Vector Element reverse
> -    NEON_REV64,
> -    NEON_REV32,
> -    NEON_REV16,
> -
> -    // Vector compare
> -    NEON_CMP,
> -
> -    // Vector compare zero
> -    NEON_CMPZ,
> -
> -    // Vector compare bitwise test
> -    NEON_TST,
> -
> -    // Vector saturating shift
> -    NEON_QSHLs,
> -    NEON_QSHLu,
> -
> -    // Vector dup
> -    NEON_VDUP,
> -
> -    // Vector dup by lane
> -    NEON_VDUPLANE,
> -
> -    // Vector extract
> -    NEON_VEXTRACT,
> -
> -    // NEON duplicate lane loads
> -    NEON_LD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
> -    NEON_LD3DUP,
> -    NEON_LD4DUP,
> -
> -    // NEON loads with post-increment base updates:
> -    NEON_LD1_UPD,
> -    NEON_LD2_UPD,
> -    NEON_LD3_UPD,
> -    NEON_LD4_UPD,
> -    NEON_LD1x2_UPD,
> -    NEON_LD1x3_UPD,
> -    NEON_LD1x4_UPD,
> -
> -    // NEON stores with post-increment base updates:
> -    NEON_ST1_UPD,
> -    NEON_ST2_UPD,
> -    NEON_ST3_UPD,
> -    NEON_ST4_UPD,
> -    NEON_ST1x2_UPD,
> -    NEON_ST1x3_UPD,
> -    NEON_ST1x4_UPD,
> -
> -    // NEON duplicate lane loads with post-increment base updates:
> -    NEON_LD2DUP_UPD,
> -    NEON_LD3DUP_UPD,
> -    NEON_LD4DUP_UPD,
> -
> -    // NEON lane loads with post-increment base updates:
> -    NEON_LD2LN_UPD,
> -    NEON_LD3LN_UPD,
> -    NEON_LD4LN_UPD,
> -
> -    // NEON lane store with post-increment base updates:
> -    NEON_ST2LN_UPD,
> -    NEON_ST3LN_UPD,
> -    NEON_ST4LN_UPD
> -  };
> -}
> -
> -
> -class AArch64Subtarget;
> -class AArch64TargetMachine;
> -
> -class AArch64TargetLowering : public TargetLowering {
> -public:
> -  explicit AArch64TargetLowering(AArch64TargetMachine &TM);
> -
> -  const char *getTargetNodeName(unsigned Opcode) const override;
> -
> -  CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const;
> -
> -  SDValue LowerFormalArguments(SDValue Chain,
> -                               CallingConv::ID CallConv, bool isVarArg,
> -                               const SmallVectorImpl<ISD::InputArg> &Ins,
> -                               SDLoc dl, SelectionDAG &DAG,
> -                               SmallVectorImpl<SDValue> &InVals) const override;
> -
> -  SDValue LowerReturn(SDValue Chain,
> -                      CallingConv::ID CallConv, bool isVarArg,
> -                      const SmallVectorImpl<ISD::OutputArg> &Outs,
> -                      const SmallVectorImpl<SDValue> &OutVals,
> -                      SDLoc dl, SelectionDAG &DAG) const override;
> -
> -  unsigned getByValTypeAlignment(Type *Ty) const override;
> -
> -  SDValue LowerCall(CallLoweringInfo &CLI,
> -                    SmallVectorImpl<SDValue> &InVals) const override;
> -
> -  SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
> -                          CallingConv::ID CallConv, bool IsVarArg,
> -                          const SmallVectorImpl<ISD::InputArg> &Ins,
> -                          SDLoc dl, SelectionDAG &DAG,
> -                          SmallVectorImpl<SDValue> &InVals) const;
> -
> -  SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
> -
> -  bool isConcatVector(SDValue Op, SelectionDAG &DAG, SDValue V0, SDValue V1,
> -                      const int *Mask, SDValue &Res) const;
> -
> -  bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0,
> -                            SDValue &V1, int *Mask) const;
> -
> -  SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
> -                            const AArch64Subtarget *ST) const;
> -
> -  SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
> -
> -  void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
> -                           SDValue &Chain) const;
> -
> -  /// IsEligibleForTailCallOptimization - Check whether the call is eligible
> -  /// for tail call optimization. Targets which want to do tail call
> -  /// optimization should implement this function.
> -  bool IsEligibleForTailCallOptimization(SDValue Callee,
> -                                    CallingConv::ID CalleeCC,
> -                                    bool IsVarArg,
> -                                    bool IsCalleeStructRet,
> -                                    bool IsCallerStructRet,
> -                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
> -                                    const SmallVectorImpl<SDValue> &OutVals,
> -                                    const SmallVectorImpl<ISD::InputArg> &Ins,
> -                                    SelectionDAG& DAG) const;
> -
> -  /// Finds the incoming stack arguments which overlap the given fixed stack
> -  /// object and incorporates their load into the current chain. This prevents
> -  /// an upcoming store from clobbering the stack argument before it's used.
> -  SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
> -                              MachineFrameInfo *MFI, int ClobberedFI) const;
> -
> -  EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
> -
> -  bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
> -
> -  bool IsTailCallConvention(CallingConv::ID CallCC) const;
> -
> -  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
> -
> -  bool isLegalICmpImmediate(int64_t Val) const override;
> -
> -  /// \brief Return true if the addressing mode represented by AM is legal for
> -  /// this target, for a load/store of the specified type.
> -  bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
> -
> -  /// \brief Return the cost of the scaling factor used in the addressing
> -  /// mode represented by AM for this target, for a load/store
> -  /// of the specified type.
> -  /// If the AM is supported, the return value must be >= 0.
> -  /// If the AM is not supported, it returns a negative value.
> -  int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override;
> -
> -  bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
> -  bool isTruncateFree(EVT VT1, EVT VT2) const override;
> -
> -  bool isZExtFree(Type *Ty1, Type *Ty2) const override;
> -  bool isZExtFree(EVT VT1, EVT VT2) const override;
> -  bool isZExtFree(SDValue Val, EVT VT2) const override;
> -
> -  SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
> -                         SDValue &A64cc, SelectionDAG &DAG, SDLoc &dl) const;
> -
> -  MachineBasicBlock *
> -  EmitInstrWithCustomInserter(MachineInstr *MI,
> -                              MachineBasicBlock *MBB) const override;
> -
> -  MachineBasicBlock *
> -  emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB,
> -                   unsigned Size, unsigned Opcode) const;
> -
> -  MachineBasicBlock *
> -  emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB,
> -                         unsigned Size, unsigned CmpOp,
> -                         A64CC::CondCodes Cond) const;
> -  MachineBasicBlock *
> -  emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
> -                    unsigned Size) const;
> -
> -  MachineBasicBlock *
> -  EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const;
> -
> -  SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
> -                          RTLIB::Libcall Call) const;
> -  SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
> -  SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
> -
> -  SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
> -
> -  SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
> -
> -  SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
> -                           SelectionDAG &DAG) const;
> -  SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
> -  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
> -  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
> -
> -  SDValue PerformDAGCombine(SDNode *N,DAGCombinerInfo &DCI) const override;
> -
> -  unsigned getRegisterByName(const char* RegName, EVT VT) const override;
> -
> -  /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
> -  /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
> -  /// expanded to FMAs when this method returns true, otherwise fmuladd is
> -  /// expanded to fmul + fadd.
> -  bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
> -
> -  /// allowsUnalignedMemoryAccesses - Returns true if the target allows
> -  /// unaligned memory accesses of the specified type. Returns whether it
> -  /// is "fast" by reference in the second argument.
> -  bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
> -                                     bool *Fast) const override;
> -
> -  ConstraintType
> -  getConstraintType(const std::string &Constraint) const override;
> -
> -  ConstraintWeight
> -  getSingleConstraintMatchWeight(AsmOperandInfo &Info,
> -                                 const char *Constraint) const override;
> -  void LowerAsmOperandForConstraint(SDValue Op,
> -                                    std::string &Constraint,
> -                                    std::vector<SDValue> &Ops,
> -                                    SelectionDAG &DAG) const override;
> -
> -  std::pair<unsigned, const TargetRegisterClass*>
> -  getRegForInlineAsmConstraint(const std::string &Constraint,
> -                               MVT VT) const override;
> -
> -  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
> -                          unsigned Intrinsic) const override;
> -
> -  /// getMaximalGlobalOffset - Returns the maximal possible offset which can
> -  /// be used for loads / stores from the global.
> -  unsigned getMaximalGlobalOffset() const override;
> -
> -protected:
> -  std::pair<const TargetRegisterClass*, uint8_t>
> -  findRepresentativeClass(MVT VT) const override;
> -
> -private:
> -  const InstrItineraryData *Itins;
> -
> -  const AArch64Subtarget *getSubtarget() const {
> -    return &getTargetMachine().getSubtarget<AArch64Subtarget>();
> -  }
> -};
> -enum NeonModImmType {
> -  Neon_Mov_Imm,
> -  Neon_Mvn_Imm
> -};
> -
> -extern SDValue ScanBUILD_VECTOR(SDValue Op, bool &isOnlyLowElement,
> -                                bool &usesOnlyOneValue, bool &hasDominantValue,
> -                                bool &isConstant, bool &isUNDEF);
> -} // namespace llvm
> -
> -#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td (removed)
> @@ -1,1487 +0,0 @@
> -//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -// This file describes AArch64 instruction formats, down to the level of the
> -// instruction's overall class.
> -//===----------------------------------------------------------------------===//
> -
> -
> -//===----------------------------------------------------------------------===//
> -// A64 Instruction Format Definitions.
> -//===----------------------------------------------------------------------===//
> -
> -// A64 is currently the only instruction set supported by the AArch64
> -// architecture.
> -class A64Inst<dag outs, dag ins, string asmstr, list<dag> patterns,
> -              InstrItinClass itin>
> -    : Instruction {
> -  // All A64 instructions are 32-bit. This field will be filled in
> -  // gradually going down the hierarchy.
> -  field bits<32> Inst;
> -
> -  field bits<32> Unpredictable = 0;
> -  // SoftFail is the generic name for this field, but we alias it so
> -  // as to make it more obvious what it means in ARM-land.
> -  field bits<32> SoftFail = Unpredictable;
> -
> -  // LLVM-level model of the AArch64/A64 distinction.
> -  let Namespace = "AArch64";
> -  let DecoderNamespace = "A64";
> -  let Size = 4;
> -
> -  // Set the templated fields
> -  let OutOperandList = outs;
> -  let InOperandList = ins;
> -  let AsmString = asmstr;
> -  let Pattern = patterns;
> -  let Itinerary = itin;
> -}
> -
> -class PseudoInst<dag outs, dag ins, list<dag> patterns> : Instruction {
> -  let Namespace = "AArch64";
> -
> -  let OutOperandList = outs;
> -  let InOperandList= ins;
> -  let Pattern = patterns;
> -  let isCodeGenOnly = 1;
> -  let isPseudo = 1;
> -}
> -
> -// Represents a pseudo-instruction that represents a single A64 instruction for
> -// whatever reason, the eventual result will be a 32-bit real instruction.
> -class A64PseudoInst<dag outs, dag ins, list<dag> patterns>
> -  : PseudoInst<outs, ins, patterns> {
> -  let Size = 4;
> -}
> -
> -// As above, this will be a single A64 instruction, but we can actually give the
> -// expansion in TableGen.
> -class A64PseudoExpand<dag outs, dag ins, list<dag> patterns, dag Result>
> -  : A64PseudoInst<outs, ins, patterns>,
> -    PseudoInstExpansion<Result>;
> -
> -
> -// First, some common cross-hierarchy register formats.
> -
> -class A64InstRd<dag outs, dag ins, string asmstr,
> -                list<dag> patterns, InstrItinClass itin>
> -  : A64Inst<outs, ins, asmstr, patterns, itin> {
> -  bits<5> Rd;
> -
> -  let Inst{4-0} = Rd;
> -}
> -
> -class A64InstRt<dag outs, dag ins, string asmstr,
> -                list<dag> patterns, InstrItinClass itin>
> -  : A64Inst<outs, ins, asmstr, patterns, itin> {
> -  bits<5> Rt;
> -
> -  let Inst{4-0} = Rt;
> -}
> -
> -
> -class A64InstRdn<dag outs, dag ins, string asmstr,
> -                 list<dag> patterns, InstrItinClass itin>
> -    : A64InstRd<outs, ins, asmstr, patterns, itin> {
> -  // Inherit rdt
> -  bits<5> Rn;
> -
> -  let Inst{9-5} = Rn;
> -}
> -
> -class A64InstRtn<dag outs, dag ins, string asmstr,
> -                list<dag> patterns, InstrItinClass itin>
> -    : A64InstRt<outs, ins, asmstr, patterns, itin> {
> -  // Inherit rdt
> -  bits<5> Rn;
> -
> -  let Inst{9-5} = Rn;
> -}
> -
> -// Instructions taking Rt,Rt2,Rn
> -class A64InstRtt2n<dag outs, dag ins, string asmstr,
> -                   list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> -  bits<5> Rt2;
> -
> -  let Inst{14-10} = Rt2;
> -}
> -
> -class A64InstRdnm<dag outs, dag ins, string asmstr,
> -                  list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  bits<5> Rm;
> -
> -  let Inst{20-16} = Rm;
> -}
> -
> -class A64InstRtnm<dag outs, dag ins, string asmstr,
> -                  list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> -  bits<5> Rm;
> -
> -  let Inst{20-16} = Rm;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -//
> -// Actual A64 Instruction Formats
> -//
> -
> -// Format for Add-subtract (extended register) instructions.
> -class A64I_addsubext<bit sf, bit op, bit S, bits<2> opt, bits<3> option,
> -                     dag outs, dag ins, string asmstr, list<dag> patterns,
> -                     InstrItinClass itin>
> -    : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -    bits<3> Imm3;
> -
> -    let Inst{31} = sf;
> -    let Inst{30} = op;
> -    let Inst{29} = S;
> -    let Inst{28-24} = 0b01011;
> -    let Inst{23-22} = opt;
> -    let Inst{21} = 0b1;
> -    // Rm inherited in 20-16
> -    let Inst{15-13} = option;
> -    let Inst{12-10} = Imm3;
> -    // Rn inherited in 9-5
> -    // Rd inherited in 4-0
> -}
> -
> -// Format for Add-subtract (immediate) instructions.
> -class A64I_addsubimm<bit sf, bit op, bit S, bits<2> shift,
> -                     dag outs, dag ins, string asmstr,
> -                     list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  bits<12> Imm12;
> -
> -  let Inst{31} = sf;
> -  let Inst{30} = op;
> -  let Inst{29} = S;
> -  let Inst{28-24} = 0b10001;
> -  let Inst{23-22} = shift;
> -  let Inst{21-10} = Imm12;
> -}
> -
> -// Format for Add-subtract (shifted register) instructions.
> -class A64I_addsubshift<bit sf, bit op, bit S, bits<2> shift,
> -                       dag outs, dag ins, string asmstr, list<dag> patterns,
> -                       InstrItinClass itin>
> -    : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -    bits<6> Imm6;
> -
> -    let Inst{31} = sf;
> -    let Inst{30} = op;
> -    let Inst{29} = S;
> -    let Inst{28-24} = 0b01011;
> -    let Inst{23-22} = shift;
> -    let Inst{21} = 0b0;
> -    // Rm inherited in 20-16
> -    let Inst{15-10} = Imm6;
> -    // Rn inherited in 9-5
> -    // Rd inherited in 4-0
> -}
> -
> -// Format for Add-subtract (with carry) instructions.
> -class A64I_addsubcarry<bit sf, bit op, bit S, bits<6> opcode2,
> -                       dag outs, dag ins, string asmstr, list<dag> patterns,
> -                       InstrItinClass itin>
> -    : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -    let Inst{31} = sf;
> -    let Inst{30} = op;
> -    let Inst{29} = S;
> -    let Inst{28-21} = 0b11010000;
> -    // Rm inherited in 20-16
> -    let Inst{15-10} = opcode2;
> -    // Rn inherited in 9-5
> -    // Rd inherited in 4-0
> -}
> -
> -
> -// Format for Bitfield instructions
> -class A64I_bitfield<bit sf, bits<2> opc, bit n,
> -                    dag outs, dag ins, string asmstr,
> -                    list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  bits<6> ImmR;
> -  bits<6> ImmS;
> -
> -  let Inst{31} = sf;
> -  let Inst{30-29} = opc;
> -  let Inst{28-23} = 0b100110;
> -  let Inst{22} = n;
> -  let Inst{21-16} = ImmR;
> -  let Inst{15-10} = ImmS;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format for compare and branch (immediate) instructions.
> -class A64I_cmpbr<bit sf, bit op,
> -                  dag outs, dag ins, string asmstr,
> -                  list<dag> patterns, InstrItinClass itin>
> -  : A64InstRt<outs, ins, asmstr, patterns, itin> {
> -  bits<19> Label;
> -
> -  let Inst{31} = sf;
> -  let Inst{30-25} = 0b011010;
> -  let Inst{24} = op;
> -  let Inst{23-5} = Label;
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format for conditional branch (immediate) instructions.
> -class A64I_condbr<bit o1, bit o0,
> -                  dag outs, dag ins, string asmstr,
> -                  list<dag> patterns, InstrItinClass itin>
> -  : A64Inst<outs, ins, asmstr, patterns, itin> {
> -  bits<19> Label;
> -  bits<4> Cond;
> -
> -  let Inst{31-25} = 0b0101010;
> -  let Inst{24} = o1;
> -  let Inst{23-5} = Label;
> -  let Inst{4} = o0;
> -  let Inst{3-0} = Cond;
> -}
> -
> -// Format for conditional compare (immediate) instructions.
> -class A64I_condcmpimm<bit sf, bit op, bit o2, bit o3, bit s,
> -                      dag outs, dag ins, string asmstr,
> -                      list<dag> patterns, InstrItinClass itin>
> -  : A64Inst<outs, ins, asmstr, patterns, itin> {
> -  bits<5> Rn;
> -  bits<5> UImm5;
> -  bits<4> NZCVImm;
> -  bits<4> Cond;
> -
> -  let Inst{31} = sf;
> -  let Inst{30} = op;
> -  let Inst{29} = s;
> -  let Inst{28-21} = 0b11010010;
> -  let Inst{20-16} = UImm5;
> -  let Inst{15-12} = Cond;
> -  let Inst{11} = 0b1;
> -  let Inst{10} = o2;
> -  let Inst{9-5} = Rn;
> -  let Inst{4} = o3;
> -  let Inst{3-0} = NZCVImm;
> -}
> -
> -// Format for conditional compare (register) instructions.
> -class A64I_condcmpreg<bit sf, bit op, bit o2, bit o3, bit s,
> -                      dag outs, dag ins, string asmstr,
> -                      list<dag> patterns, InstrItinClass itin>
> -  : A64Inst<outs, ins, asmstr, patterns, itin> {
> -  bits<5> Rn;
> -  bits<5> Rm;
> -  bits<4> NZCVImm;
> -  bits<4> Cond;
> -
> -
> -  let Inst{31} = sf;
> -  let Inst{30} = op;
> -  let Inst{29} = s;
> -  let Inst{28-21} = 0b11010010;
> -  let Inst{20-16} = Rm;
> -  let Inst{15-12} = Cond;
> -  let Inst{11} = 0b0;
> -  let Inst{10} = o2;
> -  let Inst{9-5} = Rn;
> -  let Inst{4} = o3;
> -  let Inst{3-0} = NZCVImm;
> -}
> -
> -// Format for conditional select instructions.
> -class A64I_condsel<bit sf, bit op, bit s, bits<2> op2,
> -                   dag outs, dag ins, string asmstr,
> -                   list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  bits<4> Cond;
> -
> -  let Inst{31} = sf;
> -  let Inst{30} = op;
> -  let Inst{29} = s;
> -  let Inst{28-21} = 0b11010100;
> -  // Inherit Rm in 20-16
> -  let Inst{15-12} = Cond;
> -  let Inst{11-10} = op2;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format for data processing (1 source) instructions
> -class A64I_dp_1src<bit sf, bit S, bits<5> opcode2, bits<6> opcode,
> -                string asmstr, dag outs, dag ins,
> -                list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = sf;
> -  let Inst{30} = 0b1;
> -  let Inst{29} = S;
> -  let Inst{28-21} = 0b11010110;
> -  let Inst{20-16} = opcode2;
> -  let Inst{15-10} = opcode;
> -}
> -
> -// Format for data processing (2 source) instructions
> -class A64I_dp_2src<bit sf, bits<6> opcode, bit S,
> -                string asmstr, dag outs, dag ins,
> -                list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = sf;
> -  let Inst{30} = 0b0;
> -  let Inst{29} = S;
> -  let Inst{28-21} = 0b11010110;
> -  let Inst{15-10} = opcode;
> -}
> -
> -// Format for data-processing (3 source) instructions
> -
> -class A64I_dp3<bit sf, bits<6> opcode,
> -               dag outs, dag ins, string asmstr,
> -               list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = sf;
> -  let Inst{30-29} = opcode{5-4};
> -  let Inst{28-24} = 0b11011;
> -  let Inst{23-21} = opcode{3-1};
> -  // Inherits Rm in 20-16
> -  let Inst{15} = opcode{0};
> -  // {14-10} mostly Ra, but unspecified for SMULH/UMULH
> -  // Inherits Rn in 9-5
> -  // Inherits Rd in 4-0
> -}
> -
> -// Format for exception generation instructions
> -class A64I_exception<bits<3> opc, bits<3> op2, bits<2> ll,
> -                     dag outs, dag ins, string asmstr,
> -                     list<dag> patterns, InstrItinClass itin>
> -  : A64Inst<outs, ins, asmstr, patterns, itin> {
> -  bits<16> UImm16;
> -
> -  let Inst{31-24} = 0b11010100;
> -  let Inst{23-21} = opc;
> -  let Inst{20-5} = UImm16;
> -  let Inst{4-2} = op2;
> -  let Inst{1-0} = ll;
> -}
> -
> -// Format for extract (immediate) instructions
> -class A64I_extract<bit sf, bits<3> op, bit n,
> -                   dag outs, dag ins, string asmstr,
> -                   list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  bits<6> LSB;
> -
> -  let Inst{31} = sf;
> -  let Inst{30-29} = op{2-1};
> -  let Inst{28-23} = 0b100111;
> -  let Inst{22} = n;
> -  let Inst{21} = op{0};
> -  // Inherits Rm in bits 20-16
> -  let Inst{15-10} = LSB;
> -  // Inherits Rn in 9-5
> -  // Inherits Rd in 4-0
> -}
> -
> -let Predicates = [HasFPARMv8] in {
> -
> -// Format for floating-point compare instructions.
> -class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2,
> -                dag outs, dag ins, string asmstr,
> -                list<dag> patterns, InstrItinClass itin>
> -  : A64Inst<outs, ins, asmstr, patterns, itin> {
> -  bits<5> Rn;
> -  bits<5> Rm;
> -
> -  let Inst{31} = m;
> -  let Inst{30} = 0b0;
> -  let Inst{29} = s;
> -  let Inst{28-24} = 0b11110;
> -  let Inst{23-22} = type;
> -  let Inst{21} = 0b1;
> -  let Inst{20-16} = Rm;
> -  let Inst{15-14} = op;
> -  let Inst{13-10} = 0b1000;
> -  let Inst{9-5} = Rn;
> -  let Inst{4-0} = opcode2;
> -}
> -
> -// Format for floating-point conditional compare instructions.
> -class A64I_fpccmp<bit m, bit s, bits<2> type, bit op,
> -                 dag outs, dag ins, string asmstr,
> -                 list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  bits<5> Rn;
> -  bits<5> Rm;
> -  bits<4> NZCVImm;
> -  bits<4> Cond;
> -
> -  let Inst{31} = m;
> -  let Inst{30} = 0b0;
> -  let Inst{29} = s;
> -  let Inst{28-24} = 0b11110;
> -  let Inst{23-22} = type;
> -  let Inst{21} = 0b1;
> -  let Inst{20-16} = Rm;
> -  let Inst{15-12} = Cond;
> -  let Inst{11-10} = 0b01;
> -  let Inst{9-5} = Rn;
> -  let Inst{4} = op;
> -  let Inst{3-0} = NZCVImm;
> -}
> -
> -// Format for floating-point conditional select instructions.
> -class A64I_fpcondsel<bit m, bit s, bits<2> type,
> -                     dag outs, dag ins, string asmstr,
> -                     list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  bits<4> Cond;
> -
> -  let Inst{31} = m;
> -  let Inst{30} = 0b0;
> -  let Inst{29} = s;
> -  let Inst{28-24} = 0b11110;
> -  let Inst{23-22} = type;
> -  let Inst{21} = 0b1;
> -  // Inherit Rm in 20-16
> -  let Inst{15-12} = Cond;
> -  let Inst{11-10} = 0b11;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -
> -// Format for floating-point data-processing (1 source) instructions.
> -class A64I_fpdp1<bit m, bit s, bits<2> type, bits<6> opcode,
> -                 dag outs, dag ins, string asmstr,
> -                 list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = m;
> -  let Inst{30} = 0b0;
> -  let Inst{29} = s;
> -  let Inst{28-24} = 0b11110;
> -  let Inst{23-22} = type;
> -  let Inst{21} = 0b1;
> -  let Inst{20-15} = opcode;
> -  let Inst{14-10} = 0b10000;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format for floating-point data-processing (2 sources) instructions.
> -class A64I_fpdp2<bit m, bit s, bits<2> type, bits<4> opcode,
> -                 dag outs, dag ins, string asmstr,
> -                 list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = m;
> -  let Inst{30} = 0b0;
> -  let Inst{29} = s;
> -  let Inst{28-24} = 0b11110;
> -  let Inst{23-22} = type;
> -  let Inst{21} = 0b1;
> -  // Inherit Rm in 20-16
> -  let Inst{15-12} = opcode;
> -  let Inst{11-10} = 0b10;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format for floating-point data-processing (3 sources) instructions.
> -class A64I_fpdp3<bit m, bit s, bits<2> type, bit o1, bit o0,
> -                 dag outs, dag ins, string asmstr,
> -                 list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  bits<5> Ra;
> -
> -  let Inst{31} = m;
> -  let Inst{30} = 0b0;
> -  let Inst{29} = s;
> -  let Inst{28-24} = 0b11111;
> -  let Inst{23-22} = type;
> -  let Inst{21} = o1;
> -  // Inherit Rm in 20-16
> -  let Inst{15} = o0;
> -  let Inst{14-10} = Ra;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format for floating-point <-> fixed-point conversion instructions.
> -class A64I_fpfixed<bit sf, bit s, bits<2> type, bits<2> mode, bits<3> opcode,
> -                 dag outs, dag ins, string asmstr,
> -                 list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  bits<6> Scale;
> -
> -  let Inst{31} = sf;
> -  let Inst{30} = 0b0;
> -  let Inst{29} = s;
> -  let Inst{28-24} = 0b11110;
> -  let Inst{23-22} = type;
> -  let Inst{21} = 0b0;
> -  let Inst{20-19} = mode;
> -  let Inst{18-16} = opcode;
> -  let Inst{15-10} = Scale;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format for floating-point <-> integer conversion instructions.
> -class A64I_fpint<bit sf, bit s, bits<2> type, bits<2> rmode, bits<3> opcode,
> -                 dag outs, dag ins, string asmstr,
> -                 list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = sf;
> -  let Inst{30} = 0b0;
> -  let Inst{29} = s;
> -  let Inst{28-24} = 0b11110;
> -  let Inst{23-22} = type;
> -  let Inst{21} = 0b1;
> -  let Inst{20-19} = rmode;
> -  let Inst{18-16} = opcode;
> -  let Inst{15-10} = 0b000000;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -
> -// Format for floating-point immediate instructions.
> -class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5,
> -                 dag outs, dag ins, string asmstr,
> -                 list<dag> patterns, InstrItinClass itin>
> -  : A64InstRd<outs, ins, asmstr, patterns, itin> {
> -  bits<8> Imm8;
> -
> -  let Inst{31} = m;
> -  let Inst{30} = 0b0;
> -  let Inst{29} = s;
> -  let Inst{28-24} = 0b11110;
> -  let Inst{23-22} = type;
> -  let Inst{21} = 0b1;
> -  let Inst{20-13} = Imm8;
> -  let Inst{12-10} = 0b100;
> -  let Inst{9-5} = imm5;
> -  // Inherit Rd in 4-0
> -}
> -
> -}
> -
> -// Format for load-register (literal) instructions.
> -class A64I_LDRlit<bits<2> opc, bit v,
> -                  dag outs, dag ins, string asmstr,
> -                  list<dag> patterns, InstrItinClass itin>
> -  : A64InstRt<outs, ins, asmstr, patterns, itin> {
> -  bits<19> Imm19;
> -
> -  let Inst{31-30} = opc;
> -  let Inst{29-27} = 0b011;
> -  let Inst{26} = v;
> -  let Inst{25-24} = 0b00;
> -  let Inst{23-5} = Imm19;
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format for load-store exclusive instructions.
> -class A64I_LDSTex_tn<bits<2> size, bit o2, bit L, bit o1, bit o0,
> -                 dag outs, dag ins, string asmstr,
> -                 list <dag> patterns, InstrItinClass itin>
> -  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31-30} = size;
> -  let Inst{29-24} = 0b001000;
> -  let Inst{23} = o2;
> -  let Inst{22} = L;
> -  let Inst{21} = o1;
> -  let Inst{15} = o0;
> -}
> -
> -class A64I_LDSTex_tt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
> -                     dag outs, dag ins, string asmstr,
> -                     list <dag> patterns, InstrItinClass itin>:
> -      A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
> -   bits<5> Rt2;
> -   let Inst{14-10} = Rt2;
> -}
> -
> -class A64I_LDSTex_stn<bits<2> size, bit o2, bit L, bit o1, bit o0,
> -                     dag outs, dag ins, string asmstr,
> -                     list <dag> patterns, InstrItinClass itin>:
> -      A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
> -   bits<5> Rs;
> -   let Inst{20-16} = Rs;
> -}
> -
> -class A64I_LDSTex_stt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
> -                     dag outs, dag ins, string asmstr,
> -                     list <dag> patterns, InstrItinClass itin>:
> -      A64I_LDSTex_stn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
> -   bits<5> Rt2;
> -   let Inst{14-10} = Rt2;
> -}
> -
> -// Format for load-store register (immediate post-indexed) instructions
> -class A64I_LSpostind<bits<2> size, bit v, bits<2> opc,
> -                     dag outs, dag ins, string asmstr,
> -                     list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> -  bits<9> SImm9;
> -
> -  let Inst{31-30} = size;
> -  let Inst{29-27} = 0b111;
> -  let Inst{26} = v;
> -  let Inst{25-24} = 0b00;
> -  let Inst{23-22} = opc;
> -  let Inst{21} = 0b0;
> -  let Inst{20-12} = SImm9;
> -  let Inst{11-10} = 0b01;
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format for load-store register (immediate pre-indexed) instructions
> -class A64I_LSpreind<bits<2> size, bit v, bits<2> opc,
> -                    dag outs, dag ins, string asmstr,
> -                    list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> -  bits<9> SImm9;
> -
> -
> -  let Inst{31-30} = size;
> -  let Inst{29-27} = 0b111;
> -  let Inst{26} = v;
> -  let Inst{25-24} = 0b00;
> -  let Inst{23-22} = opc;
> -  let Inst{21} = 0b0;
> -  let Inst{20-12} = SImm9;
> -  let Inst{11-10} = 0b11;
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format for load-store register (unprivileged) instructions
> -class A64I_LSunpriv<bits<2> size, bit v, bits<2> opc,
> -                    dag outs, dag ins, string asmstr,
> -                    list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> -  bits<9> SImm9;
> -
> -
> -  let Inst{31-30} = size;
> -  let Inst{29-27} = 0b111;
> -  let Inst{26} = v;
> -  let Inst{25-24} = 0b00;
> -  let Inst{23-22} = opc;
> -  let Inst{21} = 0b0;
> -  let Inst{20-12} = SImm9;
> -  let Inst{11-10} = 0b10;
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format for load-store (unscaled immediate) instructions.
> -class A64I_LSunalimm<bits<2> size, bit v, bits<2> opc,
> -                     dag outs, dag ins, string asmstr,
> -                     list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> -  bits<9> SImm9;
> -
> -  let Inst{31-30} = size;
> -  let Inst{29-27} = 0b111;
> -  let Inst{26} = v;
> -  let Inst{25-24} = 0b00;
> -  let Inst{23-22} = opc;
> -  let Inst{21} = 0b0;
> -  let Inst{20-12} = SImm9;
> -  let Inst{11-10} = 0b00;
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -
> -// Format for load-store (unsigned immediate) instructions.
> -class A64I_LSunsigimm<bits<2> size, bit v, bits<2> opc,
> -                      dag outs, dag ins, string asmstr,
> -                      list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> -  bits<12> UImm12;
> -
> -  let Inst{31-30} = size;
> -  let Inst{29-27} = 0b111;
> -  let Inst{26} = v;
> -  let Inst{25-24} = 0b01;
> -  let Inst{23-22} = opc;
> -  let Inst{21-10} = UImm12;
> -}
> -
> -// Format for load-store register (register offset) instructions.
> -class A64I_LSregoff<bits<2> size, bit v, bits<2> opc, bit optionlo,
> -                    dag outs, dag ins, string asmstr,
> -                    list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> -  bits<5> Rm;
> -
> -  // Complex operand selection needed for these instructions, so they
> -  // need an "addr" field for encoding/decoding to be generated.
> -  bits<3> Ext;
> -  // OptionHi = Ext{2-1}
> -  // S = Ext{0}
> -
> -  let Inst{31-30} = size;
> -  let Inst{29-27} = 0b111;
> -  let Inst{26} = v;
> -  let Inst{25-24} = 0b00;
> -  let Inst{23-22} = opc;
> -  let Inst{21} = 0b1;
> -  let Inst{20-16} = Rm;
> -  let Inst{15-14} = Ext{2-1};
> -  let Inst{13} = optionlo;
> -  let Inst{12} = Ext{0};
> -  let Inst{11-10} = 0b10;
> -  // Inherits Rn in 9-5
> -  // Inherits Rt in 4-0
> -
> -  let AddedComplexity = 50;
> -}
> -
> -// Format for Load-store register pair (offset) instructions
> -class A64I_LSPoffset<bits<2> opc, bit v, bit l,
> -                      dag outs, dag ins, string asmstr,
> -                      list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
> -  bits<7> SImm7;
> -
> -  let Inst{31-30} = opc;
> -  let Inst{29-27} = 0b101;
> -  let Inst{26} = v;
> -  let Inst{25-23} = 0b010;
> -  let Inst{22} = l;
> -  let Inst{21-15} = SImm7;
> -  // Inherit Rt2 in 14-10
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format for Load-store register pair (post-indexed) instructions
> -class A64I_LSPpostind<bits<2> opc, bit v, bit l,
> -                      dag outs, dag ins, string asmstr,
> -                      list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
> -  bits<7> SImm7;
> -
> -  let Inst{31-30} = opc;
> -  let Inst{29-27} = 0b101;
> -  let Inst{26} = v;
> -  let Inst{25-23} = 0b001;
> -  let Inst{22} = l;
> -  let Inst{21-15} = SImm7;
> -  // Inherit Rt2 in 14-10
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format for Load-store register pair (pre-indexed) instructions
> -class A64I_LSPpreind<bits<2> opc, bit v, bit l,
> -                      dag outs, dag ins, string asmstr,
> -                      list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
> -  bits<7> SImm7;
> -
> -  let Inst{31-30} = opc;
> -  let Inst{29-27} = 0b101;
> -  let Inst{26} = v;
> -  let Inst{25-23} = 0b011;
> -  let Inst{22} = l;
> -  let Inst{21-15} = SImm7;
> -  // Inherit Rt2 in 14-10
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format for Load-store non-temporal register pair (offset) instructions
> -class A64I_LSPnontemp<bits<2> opc, bit v, bit l,
> -                      dag outs, dag ins, string asmstr,
> -                      list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
> -  bits<7> SImm7;
> -
> -  let Inst{31-30} = opc;
> -  let Inst{29-27} = 0b101;
> -  let Inst{26} = v;
> -  let Inst{25-23} = 0b000;
> -  let Inst{22} = l;
> -  let Inst{21-15} = SImm7;
> -  // Inherit Rt2 in 14-10
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format for Logical (immediate) instructions
> -class A64I_logicalimm<bit sf, bits<2> opc,
> -                      dag outs, dag ins, string asmstr,
> -                      list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  bit N;
> -  bits<6> ImmR;
> -  bits<6> ImmS;
> -
> -  // N, ImmR and ImmS have no separate existence in any assembly syntax (or for
> -  // selection), so we'll combine them into a single field here.
> -  bits<13> Imm;
> -  // N = Imm{12};
> -  // ImmR = Imm{11-6};
> -  // ImmS = Imm{5-0};
> -
> -  let Inst{31} = sf;
> -  let Inst{30-29} = opc;
> -  let Inst{28-23} = 0b100100;
> -  let Inst{22} = Imm{12};
> -  let Inst{21-16} = Imm{11-6};
> -  let Inst{15-10} = Imm{5-0};
> -  // Rn inherited in 9-5
> -  // Rd inherited in 4-0
> -}
> -
> -// Format for Logical (shifted register) instructions
> -class A64I_logicalshift<bit sf, bits<2> opc, bits<2> shift, bit N,
> -                        dag outs, dag ins, string asmstr,
> -                        list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  bits<6> Imm6;
> -
> -  let Inst{31} = sf;
> -  let Inst{30-29} = opc;
> -  let Inst{28-24} = 0b01010;
> -  let Inst{23-22} = shift;
> -  let Inst{21} = N;
> -  // Rm inherited
> -  let Inst{15-10} = Imm6;
> -  // Rn inherited
> -  // Rd inherited
> -}
> -
> -// Format for Move wide (immediate)
> -class A64I_movw<bit sf, bits<2> opc,
> -                dag outs, dag ins, string asmstr,
> -                list<dag> patterns, InstrItinClass itin>
> -  : A64InstRd<outs, ins, asmstr, patterns, itin> {
> -  bits<16> UImm16;
> -  bits<2> Shift; // Called "hw" officially
> -
> -  let Inst{31} = sf;
> -  let Inst{30-29} = opc;
> -  let Inst{28-23} = 0b100101;
> -  let Inst{22-21} = Shift;
> -  let Inst{20-5} = UImm16;
> -  // Inherits Rd in 4-0
> -}
> -
> -// Format for PC-relative addressing instructions, ADR and ADRP.
> -class A64I_PCADR<bit op,
> -                 dag outs, dag ins, string asmstr,
> -                 list<dag> patterns, InstrItinClass itin>
> -  : A64InstRd<outs, ins, asmstr, patterns, itin> {
> -  bits<21> Label;
> -
> -  let Inst{31} = op;
> -  let Inst{30-29} = Label{1-0};
> -  let Inst{28-24} = 0b10000;
> -  let Inst{23-5} = Label{20-2};
> -}
> -
> -// Format for system instructions
> -class A64I_system<bit l,
> -                  dag outs, dag ins, string asmstr,
> -                  list<dag> patterns, InstrItinClass itin>
> -  : A64Inst<outs, ins, asmstr, patterns, itin> {
> -  bits<2> Op0;
> -  bits<3> Op1;
> -  bits<4> CRn;
> -  bits<4> CRm;
> -  bits<3> Op2;
> -  bits<5> Rt;
> -
> -  let Inst{31-22} = 0b1101010100;
> -  let Inst{21} = l;
> -  let Inst{20-19} = Op0;
> -  let Inst{18-16} = Op1;
> -  let Inst{15-12} = CRn;
> -  let Inst{11-8} = CRm;
> -  let Inst{7-5} = Op2;
> -  let Inst{4-0} = Rt;
> -
> -  // These instructions can do horrible things.
> -  let hasSideEffects = 1;
> -}
> -
> -// Format for unconditional branch (immediate) instructions
> -class A64I_Bimm<bit op,
> -                dag outs, dag ins, string asmstr,
> -                list<dag> patterns, InstrItinClass itin>
> -  : A64Inst<outs, ins, asmstr, patterns, itin> {
> -  // Doubly special in not even sharing register fields with other
> -  // instructions, so we create our own Rn here.
> -  bits<26> Label;
> -
> -  let Inst{31} = op;
> -  let Inst{30-26} = 0b00101;
> -  let Inst{25-0} = Label;
> -}
> -
> -// Format for Test & branch (immediate) instructions
> -class A64I_TBimm<bit op,
> -                dag outs, dag ins, string asmstr,
> -                list<dag> patterns, InstrItinClass itin>
> -  : A64InstRt<outs, ins, asmstr, patterns, itin> {
> -  // Doubly special in not even sharing register fields with other
> -  // instructions, so we create our own Rn here.
> -  bits<6> Imm;
> -  bits<14> Label;
> -
> -  let Inst{31} = Imm{5};
> -  let Inst{30-25} = 0b011011;
> -  let Inst{24} = op;
> -  let Inst{23-19} = Imm{4-0};
> -  let Inst{18-5} = Label;
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format for Unconditional branch (register) instructions, including
> -// RET.  Shares no fields with instructions further up the hierarchy
> -// so top-level.
> -class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4,
> -                dag outs, dag ins, string asmstr,
> -                list<dag> patterns, InstrItinClass itin>
> -  : A64Inst<outs, ins, asmstr, patterns, itin> {
> -  // Doubly special in not even sharing register fields with other
> -  // instructions, so we create our own Rn here.
> -  bits<5> Rn;
> -
> -  let Inst{31-25} = 0b1101011;
> -  let Inst{24-21} = opc;
> -  let Inst{20-16} = op2;
> -  let Inst{15-10} = op3;
> -  let Inst{9-5}   = Rn;
> -  let Inst{4-0}   = op4;
> -}
> -
> -
> -//===----------------------------------------------------------------------===//
> -//
> -// Neon Instruction Format Definitions.
> -//
> -
> -let Predicates = [HasNEON] in {
> -
> -class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1>
> -  : InstAlias<Asm, Result, Emit> {
> -}
> -
> -// Format AdvSIMD bitwise extract
> -class NeonI_BitExtract<bit q, bits<2> op2,
> -                       dag outs, dag ins, string asmstr,
> -                       list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29-24} = 0b101110;
> -  let Inst{23-22} = op2;
> -  let Inst{21} = 0b0;
> -  // Inherit Rm in 20-16
> -  let Inst{15} = 0b0;
> -  // imm4 in 14-11
> -  let Inst{10} = 0b0;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD perm
> -class NeonI_Perm<bit q, bits<2> size, bits<3> opcode,
> -                 dag outs, dag ins, string asmstr,
> -                 list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29-24} = 0b001110;
> -  let Inst{23-22} = size;
> -  let Inst{21} = 0b0;
> -  // Inherit Rm in 20-16
> -  let Inst{15} = 0b0;
> -  let Inst{14-12} = opcode;
> -  let Inst{11-10} = 0b10;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD table lookup
> -class NeonI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
> -                dag outs, dag ins, string asmstr,
> -                list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29-24} = 0b001110;
> -  let Inst{23-22} = op2;
> -  let Inst{21} = 0b0;
> -  // Inherit Rm in 20-16
> -  let Inst{15} = 0b0;
> -  let Inst{14-13} = len;
> -  let Inst{12} = op;
> -  let Inst{11-10} = 0b00;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD 3 vector registers with same vector type
> -class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
> -                   dag outs, dag ins, string asmstr,
> -                   list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29} = u;
> -  let Inst{28-24} = 0b01110;
> -  let Inst{23-22} = size;
> -  let Inst{21} = 0b1;
> -  // Inherit Rm in 20-16
> -  let Inst{15-11} = opcode;
> -  let Inst{10} = 0b1;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD 3 vector registers with different vector type
> -class NeonI_3VDiff<bit q, bit u, bits<2> size, bits<4> opcode,
> -                   dag outs, dag ins, string asmstr,
> -                   list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29} = u;
> -  let Inst{28-24} = 0b01110;
> -  let Inst{23-22} = size;
> -  let Inst{21} = 0b1;
> -  // Inherit Rm in 20-16
> -  let Inst{15-12} = opcode;
> -  let Inst{11} = 0b0;
> -  let Inst{10} = 0b0;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD two registers and an element
> -class NeonI_2VElem<bit q, bit u, bits<2> size, bits<4> opcode,
> -                   dag outs, dag ins, string asmstr,
> -                   list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29} = u;
> -  let Inst{28-24} = 0b01111;
> -  let Inst{23-22} = size;
> -  // l in Inst{21}
> -  // m in Inst{20}
> -  // Inherit Rm in 19-16
> -  let Inst{15-12} = opcode;
> -  // h in Inst{11}
> -  let Inst{10} = 0b0;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD 1 vector register with modified immediate
> -class NeonI_1VModImm<bit q, bit op,
> -                     dag outs, dag ins, string asmstr,
> -                     list<dag> patterns, InstrItinClass itin>
> -  : A64InstRd<outs,ins, asmstr, patterns, itin> {
> -  bits<8> Imm;
> -  bits<4> cmode;
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29} = op;
> -  let Inst{28-19} = 0b0111100000;
> -  let Inst{15-12} = cmode;
> -  let Inst{11} = 0b0; // o2
> -  let Inst{10} = 1;
> -  // Inherit Rd in 4-0
> -  let Inst{18-16} = Imm{7-5}; // imm a:b:c
> -  let Inst{9-5} = Imm{4-0};   // imm d:e:f:g:h
> -}
> -
> -// Format AdvSIMD 3 scalar registers with same type
> -
> -class NeonI_Scalar3Same<bit u, bits<2> size, bits<5> opcode,
> -                          dag outs, dag ins, string asmstr,
> -                          list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = 0b0;
> -  let Inst{30} = 0b1;
> -  let Inst{29} = u;
> -  let Inst{28-24} = 0b11110;
> -  let Inst{23-22} = size;
> -  let Inst{21} = 0b1;
> -  // Inherit Rm in 20-16
> -  let Inst{15-11} = opcode;
> -  let Inst{10} = 0b1;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -
> -// Format AdvSIMD 2 vector registers miscellaneous
> -class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
> -                   dag outs, dag ins, string asmstr,
> -                   list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29} = u;
> -  let Inst{28-24} = 0b01110;
> -  let Inst{23-22} = size;
> -  let Inst{21-17} = 0b10000;
> -  let Inst{16-12} = opcode;
> -  let Inst{11-10} = 0b10;
> -
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD 2 vector 1 immediate shift
> -class NeonI_2VShiftImm<bit q, bit u, bits<5> opcode,
> -                       dag outs, dag ins, string asmstr,
> -                       list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  bits<7> Imm;
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29} = u;
> -  let Inst{28-23} = 0b011110;
> -  let Inst{22-16} = Imm;
> -  let Inst{15-11} = opcode;
> -  let Inst{10} = 0b1;
> -
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD duplicate and insert
> -class NeonI_copy<bit q, bit op, bits<4> imm4,
> -                 dag outs, dag ins, string asmstr,
> -                 list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  bits<5> Imm5;
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29} = op;
> -  let Inst{28-21} = 0b01110000;
> -  let Inst{20-16} = Imm5;
> -  let Inst{15} = 0b0;
> -  let Inst{14-11} = imm4;
> -  let Inst{10} = 0b1;
> -
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -// Format AdvSIMD insert from element to vector
> -class NeonI_insert<bit q, bit op,
> -                  dag outs, dag ins, string asmstr,
> -                  list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  bits<5> Imm5;
> -  bits<4> Imm4;
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29} = op;
> -  let Inst{28-21} = 0b01110000;
> -  let Inst{20-16} = Imm5;
> -  let Inst{15} = 0b0;
> -  let Inst{14-11} = Imm4;
> -  let Inst{10} = 0b1;
> -
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD scalar pairwise
> -class NeonI_ScalarPair<bit u, bits<2> size, bits<5> opcode,
> -                          dag outs, dag ins, string asmstr,
> -                          list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = 0b0;
> -  let Inst{30} = 0b1;
> -  let Inst{29} = u;
> -  let Inst{28-24} = 0b11110;
> -  let Inst{23-22} = size;
> -  let Inst{21-17} = 0b11000;
> -  let Inst{16-12} = opcode;
> -  let Inst{11-10} = 0b10;
> -
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD 2 vector across lanes
> -class NeonI_2VAcross<bit q, bit u, bits<2> size, bits<5> opcode,
> -                     dag outs, dag ins, string asmstr,
> -                     list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin>
> -{
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29} = u;
> -  let Inst{28-24} = 0b01110;
> -  let Inst{23-22} = size;
> -  let Inst{21-17} = 0b11000;
> -  let Inst{16-12} = opcode;
> -  let Inst{11-10} = 0b10;
> -
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD scalar two registers miscellaneous
> -class NeonI_Scalar2SameMisc<bit u, bits<2> size, bits<5> opcode, dag outs, dag ins,
> -                            string asmstr, list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31} = 0b0;
> -  let Inst{30} = 0b1;
> -  let Inst{29} = u;
> -  let Inst{28-24} = 0b11110;
> -  let Inst{23-22} = size;
> -  let Inst{21-17} = 0b10000;
> -  let Inst{16-12} = opcode;
> -  let Inst{11-10} = 0b10;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD vector load/store multiple N-element structure
> -class NeonI_LdStMult<bit q, bit l, bits<4> opcode, bits<2> size,
> -                    dag outs, dag ins, string asmstr,
> -                    list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtn<outs, ins, asmstr, patterns, itin>
> -{
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29-23} = 0b0011000;
> -  let Inst{22} = l;
> -  let Inst{21-16} = 0b000000;
> -  let Inst{15-12} = opcode;
> -  let Inst{11-10} = size;
> -
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format AdvSIMD vector load/store multiple N-element structure (post-index)
> -class NeonI_LdStMult_Post<bit q, bit l, bits<4> opcode, bits<2> size,
> -                         dag outs, dag ins, string asmstr,
> -                         list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtnm<outs, ins, asmstr, patterns, itin>
> -{
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29-23} = 0b0011001;
> -  let Inst{22} = l;
> -  let Inst{21} = 0b0;
> -  // Inherit Rm in 20-16
> -  let Inst{15-12} = opcode;
> -  let Inst{11-10} = size;
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format AdvSIMD vector load Single N-element structure to all lanes
> -class NeonI_LdOne_Dup<bit q, bit r, bits<3> opcode, bits<2> size, dag outs,
> -                      dag ins, string asmstr, list<dag> patterns,
> -                      InstrItinClass itin>
> -  : A64InstRtn<outs, ins, asmstr, patterns, itin>
> -{
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29-23} = 0b0011010;
> -  let Inst{22} = 0b1;
> -  let Inst{21} = r;
> -  let Inst{20-16} = 0b00000;
> -  let Inst{15-13} = opcode;
> -  let Inst{12} = 0b0;
> -  let Inst{11-10} = size;
> -
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format AdvSIMD vector load/store Single N-element structure to/from one lane
> -class NeonI_LdStOne_Lane<bit l, bit r, bits<2> op2_1, bit op0, dag outs,
> -                         dag ins, string asmstr,
> -                         list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtn<outs, ins, asmstr, patterns, itin>
> -{
> -  bits<4> lane;
> -  let Inst{31} = 0b0;
> -  let Inst{29-23} = 0b0011010;
> -  let Inst{22} = l;
> -  let Inst{21} = r;
> -  let Inst{20-16} = 0b00000;
> -  let Inst{15-14} = op2_1;
> -  let Inst{13} = op0;
> -
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format AdvSIMD post-index vector load Single N-element structure to all lanes
> -class NeonI_LdOne_Dup_Post<bit q, bit r, bits<3> opcode, bits<2> size, dag outs,
> -                           dag ins, string asmstr, list<dag> patterns,
> -                           InstrItinClass itin>
> -  : A64InstRtnm<outs, ins, asmstr, patterns, itin>
> -{
> -  let Inst{31} = 0b0;
> -  let Inst{30} = q;
> -  let Inst{29-23} = 0b0011011;
> -  let Inst{22} = 0b1;
> -  let Inst{21} = r;
> -  // Inherit Rm in 20-16
> -  let Inst{15-13} = opcode;
> -  let Inst{12} = 0b0;
> -  let Inst{11-10} = size;
> -
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format AdvSIMD post-index vector load/store Single N-element structure
> -// to/from one lane
> -class NeonI_LdStOne_Lane_Post<bit l, bit r, bits<2> op2_1, bit op0, dag outs,
> -                         dag ins, string asmstr,
> -                         list<dag> patterns, InstrItinClass itin>
> -  : A64InstRtnm<outs, ins, asmstr, patterns, itin>
> -{
> -  bits<4> lane;
> -  let Inst{31} = 0b0;
> -  let Inst{29-23} = 0b0011011;
> -  let Inst{22} = l;
> -  let Inst{21} = r;
> -  // Inherit Rm in 20-16
> -  let Inst{15-14} = op2_1;
> -  let Inst{13} = op0;
> -
> -  // Inherit Rn in 9-5
> -  // Inherit Rt in 4-0
> -}
> -
> -// Format AdvSIMD 3 scalar registers with different type
> -
> -class NeonI_Scalar3Diff<bit u, bits<2> size, bits<4> opcode,
> -                          dag outs, dag ins, string asmstr,
> -                          list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31-30} = 0b01;
> -  let Inst{29} = u;
> -  let Inst{28-24} = 0b11110;
> -  let Inst{23-22} = size;
> -  let Inst{21} = 0b1;
> -  // Inherit Rm in 20-16
> -  let Inst{15-12} = opcode;
> -  let Inst{11-10} = 0b00;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD scalar shift by immediate
> -
> -class NeonI_ScalarShiftImm<bit u, bits<5> opcode,
> -                           dag outs, dag ins, string asmstr,
> -                           list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  bits<4> Imm4;
> -  bits<3> Imm3;
> -  let Inst{31-30} = 0b01;
> -  let Inst{29} = u;
> -  let Inst{28-23} = 0b111110;
> -  let Inst{22-19} = Imm4;
> -  let Inst{18-16} = Imm3;
> -  let Inst{15-11} = opcode;
> -  let Inst{10} = 0b1;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD crypto AES
> -class NeonI_Crypto_AES<bits<2> size, bits<5> opcode,
> -                       dag outs, dag ins, string asmstr,
> -                       list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31-24} = 0b01001110;
> -  let Inst{23-22} = size;
> -  let Inst{21-17} = 0b10100;
> -  let Inst{16-12} = opcode;
> -  let Inst{11-10} = 0b10;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD crypto SHA
> -class NeonI_Crypto_SHA<bits<2> size, bits<5> opcode,
> -                       dag outs, dag ins, string asmstr,
> -                       list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31-24} = 0b01011110;
> -  let Inst{23-22} = size;
> -  let Inst{21-17} = 0b10100;
> -  let Inst{16-12} = opcode;
> -  let Inst{11-10} = 0b10;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD crypto 3V SHA
> -class NeonI_Crypto_3VSHA<bits<2> size, bits<3> opcode,
> -                         dag outs, dag ins, string asmstr,
> -                         list<dag> patterns, InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> -  let Inst{31-24} = 0b01011110;
> -  let Inst{23-22} = size;
> -  let Inst{21} = 0b0;
> -  // Inherit Rm in 20-16
> -  let Inst{15} = 0b0;
> -  let Inst{14-12} = opcode;
> -  let Inst{11-10} = 0b00;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD scalar x indexed element
> -class NeonI_ScalarXIndexedElem<bit u, bit szhi, bit szlo,
> -                               bits<4> opcode, dag outs, dag ins,
> -                               string asmstr, list<dag> patterns,
> -                               InstrItinClass itin>
> -  : A64InstRdnm<outs, ins, asmstr, patterns, itin>
> -{
> -  let Inst{31} = 0b0;
> -  let Inst{30} = 0b1;
> -  let Inst{29} = u;
> -  let Inst{28-24} = 0b11111;
> -  let Inst{23} = szhi;
> -  let Inst{22} = szlo;
> -  // l in Inst{21}
> -  // m in Instr{20}
> -  // Inherit Rm in 19-16
> -  let Inst{15-12} = opcode;
> -  // h in Inst{11}
> -  let Inst{10} = 0b0;
> -  // Inherit Rn in 9-5
> -  // Inherit Rd in 4-0
> -}
> -// Format AdvSIMD scalar copy - insert from element to scalar
> -class NeonI_ScalarCopy<dag outs, dag ins, string asmstr,
> -                       list<dag> patterns, InstrItinClass itin>
> -  : NeonI_copy<0b1, 0b0, 0b0000, outs, ins, asmstr, patterns, itin> {
> -  let Inst{28} = 0b1;
> -}
> -}
> -
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (removed)
> @@ -1,979 +0,0 @@
> -//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file contains the AArch64 implementation of the TargetInstrInfo class.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#include "AArch64.h"
> -#include "AArch64InstrInfo.h"
> -#include "AArch64MachineFunctionInfo.h"
> -#include "AArch64TargetMachine.h"
> -#include "MCTargetDesc/AArch64MCTargetDesc.h"
> -#include "Utils/AArch64BaseInfo.h"
> -#include "llvm/CodeGen/MachineConstantPool.h"
> -#include "llvm/CodeGen/MachineDominators.h"
> -#include "llvm/CodeGen/MachineFrameInfo.h"
> -#include "llvm/CodeGen/MachineFunctionPass.h"
> -#include "llvm/CodeGen/MachineInstrBuilder.h"
> -#include "llvm/CodeGen/MachineRegisterInfo.h"
> -#include "llvm/IR/Function.h"
> -#include "llvm/Support/ErrorHandling.h"
> -#include "llvm/Support/TargetRegistry.h"
> -#include <algorithm>
> -
> -using namespace llvm;
> -
> -#define GET_INSTRINFO_CTOR_DTOR
> -#include "AArch64GenInstrInfo.inc"
> -
> -AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
> -  : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
> -    Subtarget(STI) {}
> -
> -void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
> -                                   MachineBasicBlock::iterator I, DebugLoc DL,
> -                                   unsigned DestReg, unsigned SrcReg,
> -                                   bool KillSrc) const {
> -  unsigned Opc = 0;
> -  unsigned ZeroReg = 0;
> -  if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) {
> -    // E.g. ADD xDst, xsp, #0 (, lsl #0)
> -    BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg)
> -      .addReg(SrcReg)
> -      .addImm(0);
> -    return;
> -  } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
> -    // E.g. ADD wDST, wsp, #0 (, lsl #0)
> -    BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg)
> -      .addReg(SrcReg)
> -      .addImm(0);
> -    return;
> -  } else if (DestReg == AArch64::NZCV) {
> -    assert(AArch64::GPR64RegClass.contains(SrcReg));
> -    // E.g. MSR NZCV, xDST
> -    BuildMI(MBB, I, DL, get(AArch64::MSRix))
> -      .addImm(A64SysReg::NZCV)
> -      .addReg(SrcReg);
> -  } else if (SrcReg == AArch64::NZCV) {
> -    assert(AArch64::GPR64RegClass.contains(DestReg));
> -    // E.g. MRS xDST, NZCV
> -    BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
> -      .addImm(A64SysReg::NZCV);
> -  } else if (AArch64::GPR64RegClass.contains(DestReg)) {
> -    if(AArch64::GPR64RegClass.contains(SrcReg)){
> -      Opc = AArch64::ORRxxx_lsl;
> -      ZeroReg = AArch64::XZR;
> -    } else{
> -      assert(AArch64::FPR64RegClass.contains(SrcReg));
> -      BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg)
> -        .addReg(SrcReg);
> -      return;
> -    }
> -  } else if (AArch64::GPR32RegClass.contains(DestReg)) {
> -    if(AArch64::GPR32RegClass.contains(SrcReg)){
> -      Opc = AArch64::ORRwww_lsl;
> -      ZeroReg = AArch64::WZR;
> -    } else{
> -      assert(AArch64::FPR32RegClass.contains(SrcReg));
> -      BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg)
> -        .addReg(SrcReg);
> -      return;
> -    }
> -  } else if (AArch64::FPR32RegClass.contains(DestReg)) {
> -    if(AArch64::FPR32RegClass.contains(SrcReg)){
> -      BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
> -        .addReg(SrcReg);
> -      return;
> -    }
> -    else {
> -      assert(AArch64::GPR32RegClass.contains(SrcReg));
> -      BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg)
> -        .addReg(SrcReg);
> -      return;
> -    }
> -  } else if (AArch64::FPR64RegClass.contains(DestReg)) {
> -    if(AArch64::FPR64RegClass.contains(SrcReg)){
> -      BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
> -        .addReg(SrcReg);
> -      return;
> -    }
> -    else {
> -      assert(AArch64::GPR64RegClass.contains(SrcReg));
> -      BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg)
> -        .addReg(SrcReg);
> -      return;
> -    }
> -  } else if (AArch64::FPR128RegClass.contains(DestReg)) {
> -    assert(AArch64::FPR128RegClass.contains(SrcReg));
> -
> -    // If NEON is enable, we use ORR to implement this copy.
> -    // If NEON isn't available, emit STR and LDR to handle this.
> -    if(getSubTarget().hasNEON()) {
> -      BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg)
> -        .addReg(SrcReg)
> -        .addReg(SrcReg);
> -      return;
> -    } else {
> -      BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
> -        .addReg(SrcReg)
> -        .addReg(AArch64::XSP)
> -        .addImm(0x1ff & -16);
> -
> -      BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
> -        .addReg(AArch64::XSP, RegState::Define)
> -        .addReg(AArch64::XSP)
> -        .addImm(16);
> -      return;
> -    }
> -  } else if (AArch64::FPR8RegClass.contains(DestReg, SrcReg)) {
> -    // The copy of two FPR8 registers is implemented by the copy of two FPR32
> -    const TargetRegisterInfo *TRI = &getRegisterInfo();
> -    unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_8,
> -                                            &AArch64::FPR32RegClass);
> -    unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_8,
> -                                            &AArch64::FPR32RegClass);
> -    BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst)
> -      .addReg(Src);
> -    return;
> -  } else if (AArch64::FPR16RegClass.contains(DestReg, SrcReg)) {
> -    // The copy of two FPR16 registers is implemented by the copy of two FPR32
> -    const TargetRegisterInfo *TRI = &getRegisterInfo();
> -    unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16,
> -                                            &AArch64::FPR32RegClass);
> -    unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16,
> -                                            &AArch64::FPR32RegClass);
> -    BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst)
> -      .addReg(Src);
> -    return;
> -  } else {
> -    CopyPhysRegTuple(MBB, I, DL, DestReg, SrcReg);
> -    return;
> -  }
> -
> -  // E.g. ORR xDst, xzr, xSrc, lsl #0
> -  BuildMI(MBB, I, DL, get(Opc), DestReg)
> -    .addReg(ZeroReg)
> -    .addReg(SrcReg)
> -    .addImm(0);
> -}
> -
> -void AArch64InstrInfo::CopyPhysRegTuple(MachineBasicBlock &MBB,
> -                                        MachineBasicBlock::iterator I,
> -                                        DebugLoc DL, unsigned DestReg,
> -                                        unsigned SrcReg) const {
> -  unsigned SubRegs;
> -  bool IsQRegs;
> -  if (AArch64::DPairRegClass.contains(DestReg, SrcReg)) {
> -    SubRegs = 2;
> -    IsQRegs = false;
> -  } else if (AArch64::DTripleRegClass.contains(DestReg, SrcReg)) {
> -    SubRegs = 3;
> -    IsQRegs = false;
> -  } else if (AArch64::DQuadRegClass.contains(DestReg, SrcReg)) {
> -    SubRegs = 4;
> -    IsQRegs = false;
> -  } else if (AArch64::QPairRegClass.contains(DestReg, SrcReg)) {
> -    SubRegs = 2;
> -    IsQRegs = true;
> -  } else if (AArch64::QTripleRegClass.contains(DestReg, SrcReg)) {
> -    SubRegs = 3;
> -    IsQRegs = true;
> -  } else if (AArch64::QQuadRegClass.contains(DestReg, SrcReg)) {
> -    SubRegs = 4;
> -    IsQRegs = true;
> -  } else
> -    llvm_unreachable("Unknown register class");
> -
> -  unsigned BeginIdx = IsQRegs ? AArch64::qsub_0 : AArch64::dsub_0;
> -  int Spacing = 1;
> -  const TargetRegisterInfo *TRI = &getRegisterInfo();
> -  // Copy register tuples backward when the first Dest reg overlaps
> -  // with SrcReg.
> -  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
> -    BeginIdx = BeginIdx + (SubRegs - 1);
> -    Spacing = -1;
> -  }
> -
> -  unsigned Opc = IsQRegs ? AArch64::ORRvvv_16B : AArch64::ORRvvv_8B;
> -  for (unsigned i = 0; i != SubRegs; ++i) {
> -    unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
> -    unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
> -    assert(Dst && Src && "Bad sub-register");
> -    BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
> -        .addReg(Src)
> -        .addReg(Src);
> -  }
> -  return;
> -}
> -
> -/// Does the Opcode represent a conditional branch that we can remove and re-add
> -/// at the end of a basic block?
> -static bool isCondBranch(unsigned Opc) {
> -  return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx ||
> -         Opc == AArch64::CBNZw || Opc == AArch64::CBNZx ||
> -         Opc == AArch64::TBZwii || Opc == AArch64::TBZxii ||
> -         Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii;
> -}
> -
> -/// Takes apart a given conditional branch MachineInstr (see isCondBranch),
> -/// setting TBB to the destination basic block and populating the Cond vector
> -/// with data necessary to recreate the conditional branch at a later
> -/// date. First element will be the opcode, and subsequent ones define the
> -/// conditions being branched on in an instruction-specific manner.
> -static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB,
> -                               SmallVectorImpl<MachineOperand> &Cond) {
> -  switch(I->getOpcode()) {
> -  case AArch64::Bcc:
> -  case AArch64::CBZw:
> -  case AArch64::CBZx:
> -  case AArch64::CBNZw:
> -  case AArch64::CBNZx:
> -    // These instructions just have one predicate operand in position 0 (either
> -    // a condition code or a register being compared).
> -    Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
> -    Cond.push_back(I->getOperand(0));
> -    TBB = I->getOperand(1).getMBB();
> -    return;
> -  case AArch64::TBZwii:
> -  case AArch64::TBZxii:
> -  case AArch64::TBNZwii:
> -  case AArch64::TBNZxii:
> -    // These have two predicate operands: a register and a bit position.
> -    Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
> -    Cond.push_back(I->getOperand(0));
> -    Cond.push_back(I->getOperand(1));
> -    TBB = I->getOperand(2).getMBB();
> -    return;
> -  default:
> -    llvm_unreachable("Unknown conditional branch to classify");
> -  }
> -}
> -
> -
> -bool
> -AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
> -                                MachineBasicBlock *&FBB,
> -                                SmallVectorImpl<MachineOperand> &Cond,
> -                                bool AllowModify) const {
> -  // If the block has no terminators, it just falls into the block after it.
> -  MachineBasicBlock::iterator I = MBB.end();
> -  if (I == MBB.begin())
> -    return false;
> -  --I;
> -  while (I->isDebugValue()) {
> -    if (I == MBB.begin())
> -      return false;
> -    --I;
> -  }
> -  if (!isUnpredicatedTerminator(I))
> -    return false;
> -
> -  // Get the last instruction in the block.
> -  MachineInstr *LastInst = I;
> -
> -  // If there is only one terminator instruction, process it.
> -  unsigned LastOpc = LastInst->getOpcode();
> -  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
> -    if (LastOpc == AArch64::Bimm) {
> -      TBB = LastInst->getOperand(0).getMBB();
> -      return false;
> -    }
> -    if (isCondBranch(LastOpc)) {
> -      classifyCondBranch(LastInst, TBB, Cond);
> -      return false;
> -    }
> -    return true;  // Can't handle indirect branch.
> -  }
> -
> -  // Get the instruction before it if it is a terminator.
> -  MachineInstr *SecondLastInst = I;
> -  unsigned SecondLastOpc = SecondLastInst->getOpcode();
> -
> -  // If AllowModify is true and the block ends with two or more unconditional
> -  // branches, delete all but the first unconditional branch.
> -  if (AllowModify && LastOpc == AArch64::Bimm) {
> -    while (SecondLastOpc == AArch64::Bimm) {
> -      LastInst->eraseFromParent();
> -      LastInst = SecondLastInst;
> -      LastOpc = LastInst->getOpcode();
> -      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
> -        // Return now the only terminator is an unconditional branch.
> -        TBB = LastInst->getOperand(0).getMBB();
> -        return false;
> -      } else {
> -        SecondLastInst = I;
> -        SecondLastOpc = SecondLastInst->getOpcode();
> -      }
> -    }
> -  }
> -
> -  // If there are three terminators, we don't know what sort of block this is.
> -  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
> -    return true;
> -
> -  // If the block ends with a B and a Bcc, handle it.
> -  if (LastOpc == AArch64::Bimm) {
> -    if (SecondLastOpc == AArch64::Bcc) {
> -      TBB =  SecondLastInst->getOperand(1).getMBB();
> -      Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
> -      Cond.push_back(SecondLastInst->getOperand(0));
> -      FBB = LastInst->getOperand(0).getMBB();
> -      return false;
> -    } else if (isCondBranch(SecondLastOpc)) {
> -      classifyCondBranch(SecondLastInst, TBB, Cond);
> -      FBB = LastInst->getOperand(0).getMBB();
> -      return false;
> -    }
> -  }
> -
> -  // If the block ends with two unconditional branches, handle it.  The second
> -  // one is not executed, so remove it.
> -  if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
> -    TBB = SecondLastInst->getOperand(0).getMBB();
> -    I = LastInst;
> -    if (AllowModify)
> -      I->eraseFromParent();
> -    return false;
> -  }
> -
> -  // Otherwise, can't handle this.
> -  return true;
> -}
> -
> -bool AArch64InstrInfo::ReverseBranchCondition(
> -                                  SmallVectorImpl<MachineOperand> &Cond) const {
> -  switch (Cond[0].getImm()) {
> -  case AArch64::Bcc: {
> -    A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm());
> -    CC = A64InvertCondCode(CC);
> -    Cond[1].setImm(CC);
> -    return false;
> -  }
> -  case AArch64::CBZw:
> -    Cond[0].setImm(AArch64::CBNZw);
> -    return false;
> -  case AArch64::CBZx:
> -    Cond[0].setImm(AArch64::CBNZx);
> -    return false;
> -  case AArch64::CBNZw:
> -    Cond[0].setImm(AArch64::CBZw);
> -    return false;
> -  case AArch64::CBNZx:
> -    Cond[0].setImm(AArch64::CBZx);
> -    return false;
> -  case AArch64::TBZwii:
> -    Cond[0].setImm(AArch64::TBNZwii);
> -    return false;
> -  case AArch64::TBZxii:
> -    Cond[0].setImm(AArch64::TBNZxii);
> -    return false;
> -  case AArch64::TBNZwii:
> -    Cond[0].setImm(AArch64::TBZwii);
> -    return false;
> -  case AArch64::TBNZxii:
> -    Cond[0].setImm(AArch64::TBZxii);
> -    return false;
> -  default:
> -    llvm_unreachable("Unknown branch type");
> -  }
> -}
> -
> -
> -unsigned
> -AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
> -                               MachineBasicBlock *FBB,
> -                               const SmallVectorImpl<MachineOperand> &Cond,
> -                               DebugLoc DL) const {
> -  if (!FBB && Cond.empty()) {
> -    BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB);
> -    return 1;
> -  } else if (!FBB) {
> -    MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
> -    for (int i = 1, e = Cond.size(); i != e; ++i)
> -      MIB.addOperand(Cond[i]);
> -    MIB.addMBB(TBB);
> -    return 1;
> -  }
> -
> -  MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
> -  for (int i = 1, e = Cond.size(); i != e; ++i)
> -    MIB.addOperand(Cond[i]);
> -  MIB.addMBB(TBB);
> -
> -  BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB);
> -  return 2;
> -}
> -
> -unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
> -  MachineBasicBlock::iterator I = MBB.end();
> -  if (I == MBB.begin()) return 0;
> -  --I;
> -  while (I->isDebugValue()) {
> -    if (I == MBB.begin())
> -      return 0;
> -    --I;
> -  }
> -  if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode()))
> -    return 0;
> -
> -  // Remove the branch.
> -  I->eraseFromParent();
> -
> -  I = MBB.end();
> -
> -  if (I == MBB.begin()) return 1;
> -  --I;
> -  if (!isCondBranch(I->getOpcode()))
> -    return 1;
> -
> -  // Remove the branch.
> -  I->eraseFromParent();
> -  return 2;
> -}
> -
> -bool
> -AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const {
> -  MachineInstr &MI = *MBBI;
> -  MachineBasicBlock &MBB = *MI.getParent();
> -
> -  unsigned Opcode = MI.getOpcode();
> -  switch (Opcode) {
> -  case AArch64::TLSDESC_BLRx: {
> -    MachineInstr *NewMI =
> -      BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL))
> -        .addOperand(MI.getOperand(1));
> -    MI.setDesc(get(AArch64::BLRx));
> -
> -    llvm::finalizeBundle(MBB, NewMI, *++MBBI);
> -    return true;
> -    }
> -  default:
> -    return false;
> -  }
> -
> -  return false;
> -}
> -
> -void
> -AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
> -                                      MachineBasicBlock::iterator MBBI,
> -                                      unsigned SrcReg, bool isKill,
> -                                      int FrameIdx,
> -                                      const TargetRegisterClass *RC,
> -                                      const TargetRegisterInfo *TRI) const {
> -  DebugLoc DL = MBB.findDebugLoc(MBBI);
> -  MachineFunction &MF = *MBB.getParent();
> -  MachineFrameInfo &MFI = *MF.getFrameInfo();
> -  unsigned Align = MFI.getObjectAlignment(FrameIdx);
> -
> -  MachineMemOperand *MMO
> -    = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
> -                              MachineMemOperand::MOStore,
> -                              MFI.getObjectSize(FrameIdx),
> -                              Align);
> -
> -  unsigned StoreOp = 0;
> -  if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
> -    switch(RC->getSize()) {
> -    case 4: StoreOp = AArch64::LS32_STR; break;
> -    case 8: StoreOp = AArch64::LS64_STR; break;
> -    default:
> -      llvm_unreachable("Unknown size for regclass");
> -    }
> -  } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) {
> -    StoreOp = AArch64::LSFP8_STR;
> -  } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) {
> -    StoreOp = AArch64::LSFP16_STR;
> -  } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
> -             RC->hasType(MVT::f128)) {
> -    switch (RC->getSize()) {
> -    case 4: StoreOp = AArch64::LSFP32_STR; break;
> -    case 8: StoreOp = AArch64::LSFP64_STR; break;
> -    case 16: StoreOp = AArch64::LSFP128_STR; break;
> -    default:
> -      llvm_unreachable("Unknown size for regclass");
> -    }
> -  } else { // For a super register class has more than one sub registers
> -    if (AArch64::DPairRegClass.hasSubClassEq(RC))
> -      StoreOp = AArch64::ST1x2_8B;
> -    else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
> -      StoreOp = AArch64::ST1x3_8B;
> -    else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
> -      StoreOp = AArch64::ST1x4_8B;
> -    else if (AArch64::QPairRegClass.hasSubClassEq(RC))
> -      StoreOp = AArch64::ST1x2_16B;
> -    else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
> -      StoreOp = AArch64::ST1x3_16B;
> -    else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
> -      StoreOp = AArch64::ST1x4_16B;
> -    else
> -      llvm_unreachable("Unknown reg class");
> -
> -    MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
> -    // Vector store has different operands from other store instructions.
> -    NewMI.addFrameIndex(FrameIdx)
> -         .addReg(SrcReg, getKillRegState(isKill))
> -         .addMemOperand(MMO);
> -    return;
> -  }
> -
> -  MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
> -  NewMI.addReg(SrcReg, getKillRegState(isKill))
> -    .addFrameIndex(FrameIdx)
> -    .addImm(0)
> -    .addMemOperand(MMO);
> -
> -}
> -
> -void
> -AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
> -                                       MachineBasicBlock::iterator MBBI,
> -                                       unsigned DestReg, int FrameIdx,
> -                                       const TargetRegisterClass *RC,
> -                                       const TargetRegisterInfo *TRI) const {
> -  DebugLoc DL = MBB.findDebugLoc(MBBI);
> -  MachineFunction &MF = *MBB.getParent();
> -  MachineFrameInfo &MFI = *MF.getFrameInfo();
> -  unsigned Align = MFI.getObjectAlignment(FrameIdx);
> -
> -  MachineMemOperand *MMO
> -    = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
> -                              MachineMemOperand::MOLoad,
> -                              MFI.getObjectSize(FrameIdx),
> -                              Align);
> -
> -  unsigned LoadOp = 0;
> -  if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
> -    switch(RC->getSize()) {
> -    case 4: LoadOp = AArch64::LS32_LDR; break;
> -    case 8: LoadOp = AArch64::LS64_LDR; break;
> -    default:
> -      llvm_unreachable("Unknown size for regclass");
> -    }
> -  } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) {
> -    LoadOp = AArch64::LSFP8_LDR;
> -  } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) {
> -    LoadOp = AArch64::LSFP16_LDR;
> -  } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
> -             RC->hasType(MVT::f128)) {
> -    switch (RC->getSize()) {
> -    case 4: LoadOp = AArch64::LSFP32_LDR; break;
> -    case 8: LoadOp = AArch64::LSFP64_LDR; break;
> -    case 16: LoadOp = AArch64::LSFP128_LDR; break;
> -    default:
> -      llvm_unreachable("Unknown size for regclass");
> -    }
> -  } else { // For a super register class has more than one sub registers
> -    if (AArch64::DPairRegClass.hasSubClassEq(RC))
> -      LoadOp = AArch64::LD1x2_8B;
> -    else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
> -      LoadOp = AArch64::LD1x3_8B;
> -    else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
> -      LoadOp = AArch64::LD1x4_8B;
> -    else if (AArch64::QPairRegClass.hasSubClassEq(RC))
> -      LoadOp = AArch64::LD1x2_16B;
> -    else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
> -      LoadOp = AArch64::LD1x3_16B;
> -    else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
> -      LoadOp = AArch64::LD1x4_16B;
> -    else
> -      llvm_unreachable("Unknown reg class");
> -
> -    MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
> -    // Vector load has different operands from other load instructions.
> -    NewMI.addFrameIndex(FrameIdx)
> -         .addMemOperand(MMO);
> -    return;
> -  }
> -
> -  MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
> -  NewMI.addFrameIndex(FrameIdx)
> -       .addImm(0)
> -       .addMemOperand(MMO);
> -}
> -
> -unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const {
> -  unsigned Limit = (1 << 16) - 1;
> -  for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
> -    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
> -         I != E; ++I) {
> -      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
> -        if (!I->getOperand(i).isFI()) continue;
> -
> -        // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff
> -        // is the largest offset guaranteed to fit in the immediate offset.
> -        if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) {
> -          Limit = std::min(Limit, 0xfffu);
> -          break;
> -        }
> -
> -        int AccessScale, MinOffset, MaxOffset;
> -        getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset);
> -        Limit = std::min(Limit, static_cast<unsigned>(MaxOffset));
> -
> -        break; // At most one FI per instruction
> -      }
> -    }
> -  }
> -
> -  return Limit;
> -}
> -void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
> -                                             int &AccessScale, int &MinOffset,
> -                                             int &MaxOffset) const {
> -  switch (MI.getOpcode()) {
> -  default:
> -    llvm_unreachable("Unknown load/store kind");
> -  case TargetOpcode::DBG_VALUE:
> -    AccessScale = 1;
> -    MinOffset = INT_MIN;
> -    MaxOffset = INT_MAX;
> -    return;
> -  case AArch64::LS8_LDR: case AArch64::LS8_STR:
> -  case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR:
> -  case AArch64::LDRSBw:
> -  case AArch64::LDRSBx:
> -    AccessScale = 1;
> -    MinOffset = 0;
> -    MaxOffset = 0xfff;
> -    return;
> -  case AArch64::LS16_LDR: case AArch64::LS16_STR:
> -  case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR:
> -  case AArch64::LDRSHw:
> -  case AArch64::LDRSHx:
> -    AccessScale = 2;
> -    MinOffset = 0;
> -    MaxOffset = 0xfff * AccessScale;
> -    return;
> -  case AArch64::LS32_LDR:  case AArch64::LS32_STR:
> -  case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR:
> -  case AArch64::LDRSWx:
> -  case AArch64::LDPSWx:
> -    AccessScale = 4;
> -    MinOffset = 0;
> -    MaxOffset = 0xfff * AccessScale;
> -    return;
> -  case AArch64::LS64_LDR: case AArch64::LS64_STR:
> -  case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR:
> -  case AArch64::PRFM:
> -    AccessScale = 8;
> -    MinOffset = 0;
> -    MaxOffset = 0xfff * AccessScale;
> -    return;
> -  case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR:
> -    AccessScale = 16;
> -    MinOffset = 0;
> -    MaxOffset = 0xfff * AccessScale;
> -    return;
> -  case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR:
> -  case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR:
> -    AccessScale = 4;
> -    MinOffset = -0x40 * AccessScale;
> -    MaxOffset = 0x3f * AccessScale;
> -    return;
> -  case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR:
> -  case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR:
> -    AccessScale = 8;
> -    MinOffset = -0x40 * AccessScale;
> -    MaxOffset = 0x3f * AccessScale;
> -    return;
> -  case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR:
> -    AccessScale = 16;
> -    MinOffset = -0x40 * AccessScale;
> -    MaxOffset = 0x3f * AccessScale;
> -    return;
> -  case AArch64::LD1x2_8B: case AArch64::ST1x2_8B:
> -    AccessScale = 16;
> -    MinOffset = 0;
> -    MaxOffset = 0xfff * AccessScale;
> -    return;
> -  case AArch64::LD1x3_8B: case AArch64::ST1x3_8B:
> -    AccessScale = 24;
> -    MinOffset = 0;
> -    MaxOffset = 0xfff * AccessScale;
> -    return;
> -  case AArch64::LD1x4_8B: case AArch64::ST1x4_8B:
> -  case AArch64::LD1x2_16B: case AArch64::ST1x2_16B:
> -    AccessScale = 32;
> -    MinOffset = 0;
> -    MaxOffset = 0xfff * AccessScale;
> -    return;
> -  case AArch64::LD1x3_16B: case AArch64::ST1x3_16B:
> -    AccessScale = 48;
> -    MinOffset = 0;
> -    MaxOffset = 0xfff * AccessScale;
> -    return;
> -  case AArch64::LD1x4_16B: case AArch64::ST1x4_16B:
> -    AccessScale = 64;
> -    MinOffset = 0;
> -    MaxOffset = 0xfff * AccessScale;
> -    return;
> -  }
> -}
> -
> -unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
> -  const MCInstrDesc &MCID = MI.getDesc();
> -  const MachineBasicBlock &MBB = *MI.getParent();
> -  const MachineFunction &MF = *MBB.getParent();
> -  const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo();
> -
> -  if (MCID.getSize())
> -    return MCID.getSize();
> -
> -  if (MI.getOpcode() == AArch64::INLINEASM)
> -    return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
> -
> -  switch (MI.getOpcode()) {
> -  case TargetOpcode::BUNDLE:
> -    return getInstBundleLength(MI);
> -  case TargetOpcode::IMPLICIT_DEF:
> -  case TargetOpcode::KILL:
> -  case TargetOpcode::CFI_INSTRUCTION:
> -  case TargetOpcode::EH_LABEL:
> -  case TargetOpcode::GC_LABEL:
> -  case TargetOpcode::DBG_VALUE:
> -  case AArch64::TLSDESCCALL:
> -    return 0;
> -  default:
> -    llvm_unreachable("Unknown instruction class");
> -  }
> -}
> -
> -unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
> -  unsigned Size = 0;
> -  MachineBasicBlock::const_instr_iterator I = MI;
> -  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
> -  while (++I != E && I->isInsideBundle()) {
> -    assert(!I->isBundle() && "No nested bundle!");
> -    Size += getInstSizeInBytes(*I);
> -  }
> -  return Size;
> -}
> -
> -bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
> -                                unsigned FrameReg, int &Offset,
> -                                const AArch64InstrInfo &TII) {
> -  MachineBasicBlock &MBB = *MI.getParent();
> -  MachineFunction &MF = *MBB.getParent();
> -  MachineFrameInfo &MFI = *MF.getFrameInfo();
> -
> -  MFI.getObjectOffset(FrameRegIdx);
> -  llvm_unreachable("Unimplemented rewriteFrameIndex");
> -}
> -
> -void llvm::emitRegUpdate(MachineBasicBlock &MBB,
> -                         MachineBasicBlock::iterator MBBI,
> -                         DebugLoc dl, const TargetInstrInfo &TII,
> -                         unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
> -                         int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
> -  if (NumBytes == 0 && DstReg == SrcReg)
> -    return;
> -  else if (abs64(NumBytes) & ~0xffffff) {
> -    // Generically, we have to materialize the offset into a temporary register
> -    // and subtract it. There are a couple of ways this could be done, for now
> -    // we'll use a movz/movk or movn/movk sequence.
> -    uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes));
> -    BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg)
> -      .addImm(0xffff & Bits).addImm(0)
> -      .setMIFlags(MIFlags);
> -
> -    Bits >>= 16;
> -    if (Bits & 0xffff) {
> -      BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
> -        .addReg(ScratchReg)
> -        .addImm(0xffff & Bits).addImm(1)
> -        .setMIFlags(MIFlags);
> -    }
> -
> -    Bits >>= 16;
> -    if (Bits & 0xffff) {
> -      BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
> -        .addReg(ScratchReg)
> -        .addImm(0xffff & Bits).addImm(2)
> -        .setMIFlags(MIFlags);
> -    }
> -
> -    Bits >>= 16;
> -    if (Bits & 0xffff) {
> -      BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
> -        .addReg(ScratchReg)
> -        .addImm(0xffff & Bits).addImm(3)
> -        .setMIFlags(MIFlags);
> -    }
> -
> -    // ADD DST, SRC, xTMP (, lsl #0)
> -    unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx;
> -    BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg)
> -      .addReg(SrcReg, RegState::Kill)
> -      .addReg(ScratchReg, RegState::Kill)
> -      .addImm(0)
> -      .setMIFlag(MIFlags);
> -    return;
> -  }
> -
> -  // Now we know that the adjustment can be done in at most two add/sub
> -  // (immediate) instructions, which is always more efficient than a
> -  // literal-pool load, or even a hypothetical movz/movk/add sequence
> -
> -  // Decide whether we're doing addition or subtraction
> -  unsigned LowOp, HighOp;
> -  if (NumBytes >= 0) {
> -    LowOp = AArch64::ADDxxi_lsl0_s;
> -    HighOp = AArch64::ADDxxi_lsl12_s;
> -  } else {
> -    LowOp = AArch64::SUBxxi_lsl0_s;
> -    HighOp = AArch64::SUBxxi_lsl12_s;
> -    NumBytes = abs64(NumBytes);
> -  }
> -
> -  // If we're here, at the very least a move needs to be produced, which just
> -  // happens to be materializable by an ADD.
> -  if ((NumBytes & 0xfff) || NumBytes == 0) {
> -    BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg)
> -      .addReg(SrcReg, RegState::Kill)
> -      .addImm(NumBytes & 0xfff)
> -      .setMIFlag(MIFlags);
> -
> -    // Next update should use the register we've just defined.
> -    SrcReg = DstReg;
> -  }
> -
> -  if (NumBytes & 0xfff000) {
> -    BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg)
> -      .addReg(SrcReg, RegState::Kill)
> -      .addImm(NumBytes >> 12)
> -      .setMIFlag(MIFlags);
> -  }
> -}
> -
> -void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
> -                        DebugLoc dl, const TargetInstrInfo &TII,
> -                        unsigned ScratchReg, int64_t NumBytes,
> -                        MachineInstr::MIFlag MIFlags) {
> -  emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16,
> -                NumBytes, MIFlags);
> -}
> -
> -
> -namespace {
> -  struct LDTLSCleanup : public MachineFunctionPass {
> -    static char ID;
> -    LDTLSCleanup() : MachineFunctionPass(ID) {}
> -
> -    bool runOnMachineFunction(MachineFunction &MF) override {
> -      AArch64MachineFunctionInfo* MFI
> -        = MF.getInfo<AArch64MachineFunctionInfo>();
> -      if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
> -        // No point folding accesses if there isn't at least two.
> -        return false;
> -      }
> -
> -      MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
> -      return VisitNode(DT->getRootNode(), 0);
> -    }
> -
> -    // Visit the dominator subtree rooted at Node in pre-order.
> -    // If TLSBaseAddrReg is non-null, then use that to replace any
> -    // TLS_base_addr instructions. Otherwise, create the register
> -    // when the first such instruction is seen, and then use it
> -    // as we encounter more instructions.
> -    bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
> -      MachineBasicBlock *BB = Node->getBlock();
> -      bool Changed = false;
> -
> -      // Traverse the current block.
> -      for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
> -           ++I) {
> -        switch (I->getOpcode()) {
> -        case AArch64::TLSDESC_BLRx:
> -          // Make sure it's a local dynamic access.
> -          if (!I->getOperand(1).isSymbol() ||
> -              strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
> -            break;
> -
> -          if (TLSBaseAddrReg)
> -            I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
> -          else
> -            I = SetRegister(I, &TLSBaseAddrReg);
> -          Changed = true;
> -          break;
> -        default:
> -          break;
> -        }
> -      }
> -
> -      // Visit the children of this block in the dominator tree.
> -      for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
> -           I != E; ++I) {
> -        Changed |= VisitNode(*I, TLSBaseAddrReg);
> -      }
> -
> -      return Changed;
> -    }
> -
> -    // Replace the TLS_base_addr instruction I with a copy from
> -    // TLSBaseAddrReg, returning the new instruction.
> -    MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
> -                                         unsigned TLSBaseAddrReg) {
> -      MachineFunction *MF = I->getParent()->getParent();
> -      const AArch64TargetMachine *TM =
> -          static_cast<const AArch64TargetMachine *>(&MF->getTarget());
> -      const AArch64InstrInfo *TII = TM->getInstrInfo();
> -
> -      // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
> -      // code sequence assumes the address will be.
> -      MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
> -                                   TII->get(TargetOpcode::COPY),
> -                                   AArch64::X0)
> -        .addReg(TLSBaseAddrReg);
> -
> -      // Erase the TLS_base_addr instruction.
> -      I->eraseFromParent();
> -
> -      return Copy;
> -    }
> -
> -    // Create a virtal register in *TLSBaseAddrReg, and populate it by
> -    // inserting a copy instruction after I. Returns the new instruction.
> -    MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
> -      MachineFunction *MF = I->getParent()->getParent();
> -      const AArch64TargetMachine *TM =
> -          static_cast<const AArch64TargetMachine *>(&MF->getTarget());
> -      const AArch64InstrInfo *TII = TM->getInstrInfo();
> -
> -      // Create a virtual register for the TLS base address.
> -      MachineRegisterInfo &RegInfo = MF->getRegInfo();
> -      *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
> -
> -      // Insert a copy from X0 to TLSBaseAddrReg for later.
> -      MachineInstr *Next = I->getNextNode();
> -      MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
> -                                   TII->get(TargetOpcode::COPY),
> -                                   *TLSBaseAddrReg)
> -        .addReg(AArch64::X0);
> -
> -      return Copy;
> -    }
> -
> -    const char *getPassName() const override {
> -      return "Local Dynamic TLS Access Clean-up";
> -    }
> -
> -    void getAnalysisUsage(AnalysisUsage &AU) const override {
> -      AU.setPreservesCFG();
> -      AU.addRequired<MachineDominatorTree>();
> -      MachineFunctionPass::getAnalysisUsage(AU);
> -    }
> -  };
> -}
> -
> -char LDTLSCleanup::ID = 0;
> -FunctionPass*
> -llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (removed)
> @@ -1,112 +0,0 @@
> -//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file contains the AArch64 implementation of the TargetInstrInfo class.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#ifndef LLVM_TARGET_AARCH64INSTRINFO_H
> -#define LLVM_TARGET_AARCH64INSTRINFO_H
> -
> -#include "AArch64RegisterInfo.h"
> -#include "llvm/Target/TargetInstrInfo.h"
> -
> -#define GET_INSTRINFO_HEADER
> -#include "AArch64GenInstrInfo.inc"
> -
> -namespace llvm {
> -
> -class AArch64Subtarget;
> -
> -class AArch64InstrInfo : public AArch64GenInstrInfo {
> -  const AArch64RegisterInfo RI;
> -  const AArch64Subtarget &Subtarget;
> -public:
> -  explicit AArch64InstrInfo(const AArch64Subtarget &TM);
> -
> -  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
> -  /// such, whenever a client has an instance of instruction info, it should
> -  /// always be able to get register info as well (through this method).
> -  ///
> -  const TargetRegisterInfo &getRegisterInfo() const { return RI; }
> -
> -  const AArch64Subtarget &getSubTarget() const { return Subtarget; }
> -
> -  void copyPhysReg(MachineBasicBlock &MBB,
> -                   MachineBasicBlock::iterator I, DebugLoc DL,
> -                   unsigned DestReg, unsigned SrcReg,
> -                   bool KillSrc) const override;
> -  void CopyPhysRegTuple(MachineBasicBlock &MBB,
> -                        MachineBasicBlock::iterator I, DebugLoc DL,
> -                        unsigned DestReg, unsigned SrcReg) const;
> -
> -  void storeRegToStackSlot(MachineBasicBlock &MBB,
> -                           MachineBasicBlock::iterator MI,
> -                           unsigned SrcReg, bool isKill, int FrameIndex,
> -                           const TargetRegisterClass *RC,
> -                           const TargetRegisterInfo *TRI) const override;
> -  void loadRegFromStackSlot(MachineBasicBlock &MBB,
> -                            MachineBasicBlock::iterator MBBI,
> -                            unsigned DestReg, int FrameIdx,
> -                            const TargetRegisterClass *RC,
> -                            const TargetRegisterInfo *TRI) const override;
> -
> -  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
> -                     MachineBasicBlock *&FBB,
> -                     SmallVectorImpl<MachineOperand> &Cond,
> -                     bool AllowModify = false) const override;
> -  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
> -                        MachineBasicBlock *FBB,
> -                        const SmallVectorImpl<MachineOperand> &Cond,
> -                        DebugLoc DL) const override;
> -  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
> -  bool
> -  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
> -
> -  bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
> -
> -  /// Look through the instructions in this function and work out the largest
> -  /// the stack frame can be while maintaining the ability to address local
> -  /// slots with no complexities.
> -  unsigned estimateRSStackLimit(MachineFunction &MF) const;
> -
> -  /// getAddressConstraints - For loads and stores (and PRFMs) taking an
> -  /// immediate offset, this function determines the constraints required for
> -  /// the immediate. It must satisfy:
> -  ///    + MinOffset <= imm <= MaxOffset
> -  ///    + imm % OffsetScale == 0
> -  void getAddressConstraints(const MachineInstr &MI, int &AccessScale,
> -                             int &MinOffset, int &MaxOffset) const;
> -
> -
> -  unsigned getInstSizeInBytes(const MachineInstr &MI) const;
> -
> -  unsigned getInstBundleLength(const MachineInstr &MI) const;
> -
> -};
> -
> -bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
> -                          unsigned FrameReg, int &Offset,
> -                          const AArch64InstrInfo &TII);
> -
> -
> -void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
> -                   DebugLoc dl, const TargetInstrInfo &TII,
> -                   unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
> -                   int64_t NumBytes,
> -                   MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
> -
> -void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
> -                  DebugLoc dl, const TargetInstrInfo &TII,
> -                  unsigned ScratchReg, int64_t NumBytes,
> -                  MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
> -
> -}
> -
> -#endif
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (removed)
> @@ -1,5388 +0,0 @@
> -//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file describes the AArch64 scalar instructions in TableGen format.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -//===----------------------------------------------------------------------===//
> -// ARM Instruction Predicate Definitions.
> -//
> -def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
> -                               AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
> -def HasNEON          : Predicate<"Subtarget->hasNEON()">,
> -                                 AssemblerPredicate<"FeatureNEON", "neon">;
> -def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
> -                                 AssemblerPredicate<"FeatureCrypto","crypto">;
> -
> -// Use fused MAC if more precision in FP computation is allowed.
> -def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion =="
> -                                 " FPOpFusion::Fast)">;
> -include "AArch64InstrFormats.td"
> -
> -//===----------------------------------------------------------------------===//
> -//  AArch64 specific pattern fragments.
> -//
> -// An 'fmul' node with a single use.
> -def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{
> -  return N->hasOneUse();
> -}]>;
> -
> -
> -//===----------------------------------------------------------------------===//
> -// Target-specific ISD nodes and profiles
> -//===----------------------------------------------------------------------===//
> -
> -def SDT_A64ret : SDTypeProfile<0, 0, []>;
> -def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain,
> -                                                    SDNPOptInGlue,
> -                                                    SDNPVariadic]>;
> -
> -// (ins NZCV, Condition, Dest)
> -def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>;
> -def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>;
> -
> -// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition)
> -def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>,
> -                                            SDTCisSameAs<0, 2>,
> -                                            SDTCisSameAs<2, 3>]>;
> -def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>;
> -
> -// (outs NZCV), (ins LHS, RHS, Condition)
> -def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
> -                                        SDTCisSameAs<1, 2>]>;
> -def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>;
> -
> -
> -// (outs GPR64), (ins)
> -def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
> -
> -// A64 compares don't care about the cond really (they set all flags) so a
> -// simple binary operator is useful.
> -def A64cmp : PatFrag<(ops node:$lhs, node:$rhs),
> -                     (A64setcc node:$lhs, node:$rhs, cond)>;
> -
> -
> -// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN
> -// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C
> -// and V flags can be set differently by this operation. It comes down to
> -// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are
> -// then everything is fine. If not then the optimization is wrong. Thus general
> -// comparisons are only valid if op2 != 0.
> -
> -// So, finally, the only LLVM-native comparisons that don't mention C and V are
> -// SETEQ and SETNE. They're the only ones we can safely use CMN for in the
> -// absence of information about op2.
> -def equality_cond : PatLeaf<(cond), [{
> -  return N->get() == ISD::SETEQ || N->get() == ISD::SETNE;
> -}]>;
> -
> -def A64cmn : PatFrag<(ops node:$lhs, node:$rhs),
> -                     (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>;
> -
> -// There are two layers of indirection here, driven by the following
> -// considerations.
> -//     + TableGen does not know CodeModel or Reloc so that decision should be
> -//       made for a variable/address at ISelLowering.
> -//     + The output of ISelLowering should be selectable (hence the Wrapper,
> -//       rather than a bare target opcode)
> -def SDTAArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
> -                                                  SDTCisSameAs<0, 2>,
> -                                                  SDTCisSameAs<0, 3>,
> -                                                  SDTCisSameAs<0, 4>,
> -                                                  SDTCisPtrTy<0>]>;
> -
> -def A64WrapperLarge :SDNode<"AArch64ISD::WrapperLarge", SDTAArch64WrapperLarge>;
> -
> -def SDTAArch64WrapperSmall : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
> -                                                  SDTCisSameAs<1, 2>,
> -                                                  SDTCisVT<3, i32>,
> -                                                  SDTCisPtrTy<0>]>;
> -
> -def A64WrapperSmall :SDNode<"AArch64ISD::WrapperSmall", SDTAArch64WrapperSmall>;
> -
> -
> -def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
> -def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad,
> -                        [SDNPHasChain]>;
> -
> -
> -// (A64BFI LHS, RHS, LSB, Width)
> -def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
> -                                     SDTCisSameAs<1, 2>,
> -                                     SDTCisVT<3, i64>,
> -                                     SDTCisVT<4, i64>]>;
> -
> -def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>;
> -
> -// (A64EXTR HiReg, LoReg, LSB)
> -def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
> -                                      SDTCisVT<3, i64>]>;
> -def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>;
> -
> -// (A64[SU]BFX Field, ImmR, ImmS).
> -//
> -// Note that ImmR and ImmS are already encoded for the actual instructions. The
> -// more natural LSB and Width mix together to form ImmR and ImmS, something
> -// which TableGen can't handle.
> -def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>;
> -def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>;
> -
> -def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>;
> -
> -class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
> -
> -//===----------------------------------------------------------------------===//
> -// Call sequence pseudo-instructions
> -//===----------------------------------------------------------------------===//
> -
> -
> -def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
> -def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call,
> -                     [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
> -
> -def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call,
> -                          [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
> -
> -// The TLSDESCCALL node is a variant call which goes to an indirectly calculated
> -// destination but needs a relocation against a fixed symbol. As such it has two
> -// certain operands: the callee and the relocated variable.
> -//
> -// The TLS ABI only allows it to be selected to a BLR instructin (with
> -// appropriate relocation).
> -def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
> -
> -def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall,
> -                            [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
> -                             SDNPVariadic]>;
> -
> -
> -def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>;
> -def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart,
> -                                  [SDNPHasChain, SDNPOutGlue]>;
> -
> -def SDT_AArch64CallSeqEnd   : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>;
> -def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END",   SDT_AArch64CallSeqEnd,
> -                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
> -
> -
> -
> -// These pseudo-instructions have special semantics by virtue of being passed to
> -// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by
> -// LowerCall to (in our case) tell the back-end about stack adjustments for
> -// arguments passed on the stack. Here we select those markers to
> -// pseudo-instructions which explicitly set the stack, and finally in the
> -// RegisterInfo we convert them to a true stack adjustment.
> -let Defs = [XSP], Uses = [XSP] in {
> -  def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt),
> -                                    [(AArch64callseq_start timm:$amt)]>;
> -
> -  def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2),
> -                                 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Atomic operation pseudo-instructions
> -//===----------------------------------------------------------------------===//
> -
> -// These get selected from C++ code as a pretty much direct translation from the
> -// generic DAG nodes. The one exception is the AtomicOrdering is added as an
> -// operand so that the eventual lowering can make use of it and choose
> -// acquire/release operations when required.
> -
> -let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
> -multiclass AtomicSizes {
> -  def _I8 : PseudoInst<(outs GPR32:$dst),
> -                       (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
> -  def _I16 : PseudoInst<(outs GPR32:$dst),
> -                        (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
> -  def _I32 : PseudoInst<(outs GPR32:$dst),
> -                        (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
> -  def _I64 : PseudoInst<(outs GPR64:$dst),
> -                        (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
> -}
> -}
> -
> -defm ATOMIC_LOAD_ADD  : AtomicSizes;
> -defm ATOMIC_LOAD_SUB  : AtomicSizes;
> -defm ATOMIC_LOAD_AND  : AtomicSizes;
> -defm ATOMIC_LOAD_OR   : AtomicSizes;
> -defm ATOMIC_LOAD_XOR  : AtomicSizes;
> -defm ATOMIC_LOAD_NAND : AtomicSizes;
> -defm ATOMIC_SWAP      : AtomicSizes;
> -let Defs = [NZCV] in {
> -  // These operations need a CMP to calculate the correct value
> -  defm ATOMIC_LOAD_MIN  : AtomicSizes;
> -  defm ATOMIC_LOAD_MAX  : AtomicSizes;
> -  defm ATOMIC_LOAD_UMIN : AtomicSizes;
> -  defm ATOMIC_LOAD_UMAX : AtomicSizes;
> -}
> -
> -class AtomicCmpSwap<RegisterClass GPRData>
> -  : PseudoInst<(outs GPRData:$dst),
> -               (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new,
> -                    i32imm:$ordering), []> {
> -  let usesCustomInserter = 1;
> -  let hasCtrlDep = 1;
> -  let mayLoad = 1;
> -  let mayStore = 1;
> -  let Defs = [NZCV];
> -}
> -
> -def ATOMIC_CMP_SWAP_I8  : AtomicCmpSwap<GPR32>;
> -def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
> -def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
> -def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
> -
> -//===----------------------------------------------------------------------===//
> -// Add-subtract (extended register) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP
> -
> -// The RHS of these operations is conceptually a sign/zero-extended
> -// register, optionally shifted left by 1-4. The extension can be a
> -// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but
> -// must be specified with one exception:
> -
> -// If one of the registers is sp/wsp then LSL is an alias for UXTW in
> -// 32-bit instructions and UXTX in 64-bit versions, the shift amount
> -// is not optional in that case (but can explicitly be 0), and the
> -// entire suffix can be skipped (e.g. "add sp, x3, x2").
> -
> -multiclass extend_operands<string PREFIX, string Diag> {
> -     def _asmoperand : AsmOperandClass {
> -         let Name = PREFIX;
> -         let RenderMethod = "addRegExtendOperands";
> -         let PredicateMethod = "isRegExtend<A64SE::" # PREFIX # ">";
> -         let DiagnosticType = "AddSubRegExtend" # Diag;
> -     }
> -
> -     def _operand : Operand<i64>,
> -                    ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 4; }]> {
> -         let PrintMethod = "printRegExtendOperand<A64SE::" # PREFIX # ">";
> -         let DecoderMethod = "DecodeRegExtendOperand";
> -         let ParserMatchClass = !cast<AsmOperandClass>(PREFIX # "_asmoperand");
> -     }
> -}
> -
> -defm UXTB : extend_operands<"UXTB", "Small">;
> -defm UXTH : extend_operands<"UXTH", "Small">;
> -defm UXTW : extend_operands<"UXTW", "Small">;
> -defm UXTX : extend_operands<"UXTX", "Large">;
> -defm SXTB : extend_operands<"SXTB", "Small">;
> -defm SXTH : extend_operands<"SXTH", "Small">;
> -defm SXTW : extend_operands<"SXTW", "Small">;
> -defm SXTX : extend_operands<"SXTX", "Large">;
> -
> -def LSL_extasmoperand : AsmOperandClass {
> -    let Name = "RegExtendLSL";
> -    let RenderMethod = "addRegExtendOperands";
> -    let DiagnosticType = "AddSubRegExtendLarge";
> -}
> -
> -def LSL_extoperand : Operand<i64> {
> -    let ParserMatchClass = LSL_extasmoperand;
> -}
> -
> -
> -// The patterns for various sign-extensions are a little ugly and
> -// non-uniform because everything has already been promoted to the
> -// legal i64 and i32 types. We'll wrap the various variants up in a
> -// class for use later.
> -class extend_types {
> -    dag uxtb; dag uxth; dag uxtw; dag uxtx;
> -    dag sxtb; dag sxth; dag sxtw; dag sxtx;
> -    ValueType ty;
> -    RegisterClass GPR;
> -}
> -
> -def extends_to_i64 : extend_types {
> -    let uxtb = (and (anyext i32:$Rm), 255);
> -    let uxth = (and (anyext i32:$Rm), 65535);
> -    let uxtw = (zext i32:$Rm);
> -    let uxtx = (i64 $Rm);
> -
> -    let sxtb = (sext_inreg (anyext i32:$Rm), i8);
> -    let sxth = (sext_inreg (anyext i32:$Rm), i16);
> -    let sxtw = (sext i32:$Rm);
> -    let sxtx = (i64 $Rm);
> -
> -    let ty = i64;
> -    let GPR = GPR64xsp;
> -}
> -
> -
> -def extends_to_i32 : extend_types {
> -    let uxtb = (and i32:$Rm, 255);
> -    let uxth = (and i32:$Rm, 65535);
> -    let uxtw = (i32 i32:$Rm);
> -    let uxtx = (i32 i32:$Rm);
> -
> -    let sxtb = (sext_inreg i32:$Rm, i8);
> -    let sxth = (sext_inreg i32:$Rm, i16);
> -    let sxtw = (i32 i32:$Rm);
> -    let sxtx = (i32 i32:$Rm);
> -
> -    let ty = i32;
> -    let GPR = GPR32wsp;
> -}
> -
> -// Now, six of the extensions supported are easy and uniform: if the source size
> -// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate
> -// those instructions in one block.
> -
> -// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me:
> -//     + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would
> -//       be impossible.
> -//     + Patterns are very different as well.
> -//     + Passing different registers would be ugly (more fields in extend_types
> -//       would probably be the best option).
> -multiclass addsub_exts<bit sf, bit op, bit S, string asmop,
> -                       SDPatternOperator opfrag,
> -                       dag outs, extend_types exts> {
> -    def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000,
> -                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTB_operand:$Imm3),
> -                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> -                    [(opfrag exts.ty:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))],
> -                    NoItinerary>,
> -                 Sched<[WriteALU, ReadALU, ReadALU]>;
> -    def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001,
> -                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTH_operand:$Imm3),
> -                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> -                    [(opfrag exts.ty:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))],
> -                    NoItinerary>,
> -                 Sched<[WriteALU, ReadALU, ReadALU]>;
> -    def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010,
> -                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTW_operand:$Imm3),
> -                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> -                    [(opfrag exts.ty:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))],
> -                    NoItinerary>,
> -                 Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -    def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100,
> -                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTB_operand:$Imm3),
> -                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> -                    [(opfrag exts.ty:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))],
> -                    NoItinerary>,
> -                 Sched<[WriteALU, ReadALU, ReadALU]>;
> -    def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101,
> -                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTH_operand:$Imm3),
> -                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> -                    [(opfrag exts.ty:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))],
> -                    NoItinerary>,
> -                 Sched<[WriteALU, ReadALU, ReadALU]>;
> -    def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110,
> -                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTW_operand:$Imm3),
> -                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> -                    [(opfrag exts.ty:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))],
> -                    NoItinerary>,
> -                 Sched<[WriteALU, ReadALU, ReadALU]>;
> -}
> -
> -// These two could be merge in with the above, but their patterns aren't really
> -// necessary and the naming-scheme would necessarily break:
> -multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag,
> -                       dag outs> {
> -    def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011,
> -                   outs,
> -                   (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3),
> -                   !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> -                   [(opfrag i64:$Rn, (shl i64:$Rm, UXTX_operand:$Imm3))],
> -                   NoItinerary>,
> -                 Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -    def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111,
> -                   outs,
> -                   (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3),
> -                   !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> -                   [/* No Pattern: same as uxtx */],
> -                   NoItinerary>,
> -                 Sched<[WriteALU, ReadALU, ReadALU]>;
> -}
> -
> -multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs> {
> -    def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011,
> -                   outs, (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3),
> -                   !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> -                   [/* No pattern: probably same as uxtw */],
> -                   NoItinerary>,
> -                 Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -    def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111,
> -                   outs, (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3),
> -                   !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> -                   [/* No Pattern: probably same as uxtw */],
> -                   NoItinerary>,
> -                 Sched<[WriteALU, ReadALU, ReadALU]>;
> -}
> -
> -class SetRD<RegisterClass RC, SDPatternOperator op>
> - : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>;
> -class SetNZCV<SDPatternOperator op>
> -  : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>;
> -
> -defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
> -                        (outs GPR64xsp:$Rd), extends_to_i64>,
> -            addsub_xxtx<     0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
> -                        (outs GPR64xsp:$Rd)>;
> -defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>,
> -                        (outs GPR32wsp:$Rd), extends_to_i32>,
> -            addsub_wxtx<     0b0, 0b0, "add\t$Rd, ",
> -                        (outs GPR32wsp:$Rd)>;
> -defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
> -                        (outs GPR64xsp:$Rd), extends_to_i64>,
> -            addsub_xxtx<     0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
> -                        (outs GPR64xsp:$Rd)>;
> -defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>,
> -                        (outs GPR32wsp:$Rd), extends_to_i32>,
> -            addsub_wxtx<     0b1, 0b0, "sub\t$Rd, ",
> -                        (outs GPR32wsp:$Rd)>;
> -
> -let Defs = [NZCV] in {
> -defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
> -                         (outs GPR64:$Rd), extends_to_i64>,
> -             addsub_xxtx<     0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
> -                         (outs GPR64:$Rd)>;
> -defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>,
> -                         (outs GPR32:$Rd), extends_to_i32>,
> -             addsub_wxtx<     0b0, 0b1, "adds\t$Rd, ",
> -                         (outs GPR32:$Rd)>;
> -defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
> -                         (outs GPR64:$Rd), extends_to_i64>,
> -             addsub_xxtx<     0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
> -                         (outs GPR64:$Rd)>;
> -defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>,
> -                         (outs GPR32:$Rd), extends_to_i32>,
> -             addsub_wxtx<     0b1, 0b1, "subs\t$Rd, ",
> -                         (outs GPR32:$Rd)>;
> -
> -
> -let SchedRW = [WriteCMP, ReadCMP, ReadCMP], Rd = 0b11111, isCompare = 1 in {
> -defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
> -                        (outs), extends_to_i64>,
> -            addsub_xxtx<     0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>;
> -defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
> -                        (outs), extends_to_i32>,
> -            addsub_wxtx<     0b0, 0b1, "cmn\t", (outs)>;
> -defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
> -                        (outs), extends_to_i64>,
> -            addsub_xxtx<     0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>;
> -defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
> -                        (outs), extends_to_i32>,
> -            addsub_wxtx<     0b1, 0b1, "cmp\t", (outs)>;
> -}
> -}
> -
> -// Now patterns for the operation without a shift being needed. No patterns are
> -// created for uxtx/sxtx since they're non-uniform and it's expected that
> -// add/sub (shifted register) will handle those cases anyway.
> -multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop,
> -                                      extend_types exts> {
> -    def : Pat<(nodeop exts.ty:$Rn, exts.uxtb),
> -              (!cast<Instruction>(prefix # "w_uxtb") $Rn, $Rm, 0)>;
> -    def : Pat<(nodeop exts.ty:$Rn, exts.uxth),
> -              (!cast<Instruction>(prefix # "w_uxth") $Rn, $Rm, 0)>;
> -    def : Pat<(nodeop exts.ty:$Rn, exts.uxtw),
> -              (!cast<Instruction>(prefix # "w_uxtw") $Rn, $Rm, 0)>;
> -
> -    def : Pat<(nodeop exts.ty:$Rn, exts.sxtb),
> -              (!cast<Instruction>(prefix # "w_sxtb") $Rn, $Rm, 0)>;
> -    def : Pat<(nodeop exts.ty:$Rn, exts.sxth),
> -              (!cast<Instruction>(prefix # "w_sxth") $Rn, $Rm, 0)>;
> -    def : Pat<(nodeop exts.ty:$Rn, exts.sxtw),
> -              (!cast<Instruction>(prefix # "w_sxtw") $Rn, $Rm, 0)>;
> -}
> -
> -defm : addsubext_noshift_patterns<"ADDxx", add, extends_to_i64>;
> -defm : addsubext_noshift_patterns<"ADDww", add, extends_to_i32>;
> -defm : addsubext_noshift_patterns<"SUBxx", sub, extends_to_i64>;
> -defm : addsubext_noshift_patterns<"SUBww", sub, extends_to_i32>;
> -
> -defm : addsubext_noshift_patterns<"CMNx", A64cmn, extends_to_i64>;
> -defm : addsubext_noshift_patterns<"CMNw", A64cmn, extends_to_i32>;
> -defm : addsubext_noshift_patterns<"CMPx", A64cmp, extends_to_i64>;
> -defm : addsubext_noshift_patterns<"CMPw", A64cmp, extends_to_i32>;
> -
> -// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is
> -// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the
> -// operation. Also permitted in this case is complete omission of the argument,
> -// which implies "lsl #0".
> -multiclass lsl_aliases<string asmop, Instruction inst, RegisterClass GPR_Rd,
> -                       RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
> -    def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
> -                    (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
> -
> -    def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm, $LSL"),
> -                (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL),
> -                0>;
> -
> -}
> -
> -defm : lsl_aliases<"add",  ADDxxx_uxtx,  Rxsp, GPR64xsp, GPR64>;
> -defm : lsl_aliases<"add",  ADDxxx_uxtx,  GPR64xsp, Rxsp, GPR64>;
> -defm : lsl_aliases<"add",  ADDwww_uxtw,  Rwsp, GPR32wsp, GPR32>;
> -defm : lsl_aliases<"add",  ADDwww_uxtw,  GPR32wsp, Rwsp, GPR32>;
> -defm : lsl_aliases<"sub",  SUBxxx_uxtx,  Rxsp, GPR64xsp, GPR64>;
> -defm : lsl_aliases<"sub",  SUBxxx_uxtx,  GPR64xsp, Rxsp, GPR64>;
> -defm : lsl_aliases<"sub",  SUBwww_uxtw,  Rwsp, GPR32wsp, GPR32>;
> -defm : lsl_aliases<"sub",  SUBwww_uxtw,  GPR32wsp, Rwsp, GPR32>;
> -
> -// Rd cannot be sp for flag-setting variants so only half of the aliases are
> -// needed.
> -defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>;
> -defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>;
> -defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>;
> -defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>;
> -
> -// CMP unfortunately has to be different because the instruction doesn't have a
> -// dest register.
> -multiclass cmp_lsl_aliases<string asmop, Instruction inst,
> -                       RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
> -    def : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
> -                    (inst GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
> -
> -    def : InstAlias<!strconcat(asmop, " $Rn, $Rm, $LSL"),
> -                    (inst GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
> -}
> -
> -defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>;
> -defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>;
> -defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>;
> -defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>;
> -
> -//===----------------------------------------------------------------------===//
> -// Add-subtract (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV
> -
> -// These instructions accept a 12-bit unsigned immediate, optionally shifted
> -// left by 12 bits. Official assembly format specifies a 12 bit immediate with
> -// one of "", "LSL #0", "LSL #12" supplementary operands.
> -
> -// There are surprisingly few ways to make this work with TableGen, so this
> -// implementation has separate instructions for the "LSL #0" and "LSL #12"
> -// variants.
> -
> -// If the MCInst retained a single combined immediate (which could be 0x123000,
> -// for example) then both components (imm & shift) would have to be delegated to
> -// a single assembly operand. This would entail a separate operand parser
> -// (because the LSL would have to live in the same AArch64Operand as the
> -// immediate to be accessible); assembly parsing is rather complex and
> -// error-prone C++ code.
> -//
> -// By splitting the immediate, we can delegate handling this optional operand to
> -// an InstAlias. Supporting functions to generate the correct MCInst are still
> -// required, but these are essentially trivial and parsing can remain generic.
> -//
> -// Rejected plans with rationale:
> -// ------------------------------
> -//
> -// In an ideal world you'de have two first class immediate operands (in
> -// InOperandList, specifying imm12 and shift). Unfortunately this is not
> -// selectable by any means I could discover.
> -//
> -// An Instruction with two MCOperands hidden behind a single entry in
> -// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional,
> -// but required more C++ code to handle encoding/decoding. Parsing (the intended
> -// main beneficiary) ended up equally complex because of the optional nature of
> -// "LSL #0".
> -//
> -// Attempting to circumvent the need for a custom OperandParser above by giving
> -// InstAliases without the "lsl #0" failed. add/sub could be accommodated but
> -// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands
> -// should be parsed: there was no way to accommodate an "lsl #12".
> -
> -let ParserMethod = "ParseImmWithLSLOperand",
> -    RenderMethod = "addImmWithLSLOperands" in {
> -  // Derived PredicateMethod fields are different for each
> -  def addsubimm_lsl0_asmoperand : AsmOperandClass {
> -    let Name = "AddSubImmLSL0";
> -    // If an error is reported against this operand, instruction could also be a
> -    // register variant.
> -    let DiagnosticType = "AddSubSecondSource";
> -  }
> -
> -  def addsubimm_lsl12_asmoperand : AsmOperandClass {
> -    let Name = "AddSubImmLSL12";
> -    let DiagnosticType = "AddSubSecondSource";
> -  }
> -}
> -
> -def shr_12_XFORM : SDNodeXForm<imm, [{
> -  return CurDAG->getTargetConstant(N->getSExtValue() >> 12, MVT::i32);
> -}]>;
> -
> -def shr_12_neg_XFORM : SDNodeXForm<imm, [{
> -  return CurDAG->getTargetConstant((-N->getSExtValue()) >> 12, MVT::i32);
> -}]>;
> -
> -def neg_XFORM : SDNodeXForm<imm, [{
> -  return CurDAG->getTargetConstant(-N->getSExtValue(), MVT::i32);
> -}]>;
> -
> -
> -multiclass addsub_imm_operands<ValueType ty> {
> - let PrintMethod = "printAddSubImmLSL0Operand",
> -      EncoderMethod = "getAddSubImmOpValue",
> -      ParserMatchClass = addsubimm_lsl0_asmoperand in {
> -    def _posimm_lsl0 : Operand<ty>,
> -        ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff) == 0; }]>;
> -    def _negimm_lsl0 : Operand<ty>,
> -        ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff) == 0; }],
> -                neg_XFORM>;
> -  }
> -
> -  let PrintMethod = "printAddSubImmLSL12Operand",
> -      EncoderMethod = "getAddSubImmOpValue",
> -      ParserMatchClass = addsubimm_lsl12_asmoperand in {
> -    def _posimm_lsl12 : Operand<ty>,
> -        ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff000) == 0; }],
> -                shr_12_XFORM>;
> -
> -    def _negimm_lsl12 : Operand<ty>,
> -        ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff000) == 0; }],
> -                shr_12_neg_XFORM>;
> -  }
> -}
> -
> -// The add operands don't need any transformation
> -defm addsubimm_operand_i32 : addsub_imm_operands<i32>;
> -defm addsubimm_operand_i64 : addsub_imm_operands<i64>;
> -
> -multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
> -                               string asmop, string cmpasmop,
> -                               Operand imm_operand, Operand cmp_imm_operand,
> -                               RegisterClass GPR, RegisterClass GPRsp,
> -                               AArch64Reg ZR, ValueType Ty> {
> -    // All registers for non-S variants allow SP
> -  def _s : A64I_addsubimm<sf, op, 0b0, shift,
> -                         (outs GPRsp:$Rd),
> -                         (ins GPRsp:$Rn, imm_operand:$Imm12),
> -                         !strconcat(asmop, "\t$Rd, $Rn, $Imm12"),
> -                         [(set Ty:$Rd, (add Ty:$Rn, imm_operand:$Imm12))],
> -                         NoItinerary>,
> -           Sched<[WriteALU, ReadALU]>;
> -
> -
> -  // S variants can read SP but would write to ZR
> -  def _S : A64I_addsubimm<sf, op, 0b1, shift,
> -                         (outs GPR:$Rd),
> -                         (ins GPRsp:$Rn, imm_operand:$Imm12),
> -                         !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"),
> -                         [(set Ty:$Rd, (addc Ty:$Rn, imm_operand:$Imm12))],
> -                         NoItinerary>,
> -           Sched<[WriteALU, ReadALU]> {
> -    let Defs = [NZCV];
> -  }
> -
> -  // Note that the pattern here for ADDS is subtle. Canonically CMP
> -  // a, b becomes SUBS a, b. If b < 0 then this is equivalent to
> -  // ADDS a, (-b). This is not true in general.
> -  def _cmp : A64I_addsubimm<sf, op, 0b1, shift,
> -                            (outs), (ins GPRsp:$Rn, imm_operand:$Imm12),
> -                            !strconcat(cmpasmop, " $Rn, $Imm12"),
> -                            [(set NZCV,
> -                                  (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))],
> -                            NoItinerary>,
> -           Sched<[WriteCMP, ReadCMP]> {
> -    let Rd = 0b11111;
> -    let Defs = [NZCV];
> -    let isCompare = 1;
> -  }
> -}
> -
> -
> -multiclass addsubimm_shifts<string prefix, bit sf, bit op,
> -           string asmop, string cmpasmop, string operand, string cmpoperand,
> -           RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR,
> -           ValueType Ty> {
> -  defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00,
> -                                   asmop, cmpasmop,
> -                                   !cast<Operand>(operand # "_lsl0"),
> -                                   !cast<Operand>(cmpoperand # "_lsl0"),
> -                                   GPR, GPRsp, ZR, Ty>;
> -
> -  defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01,
> -                                    asmop, cmpasmop,
> -                                    !cast<Operand>(operand # "_lsl12"),
> -                                    !cast<Operand>(cmpoperand # "_lsl12"),
> -                                    GPR, GPRsp, ZR, Ty>;
> -}
> -
> -defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn",
> -                              "addsubimm_operand_i32_posimm",
> -                              "addsubimm_operand_i32_negimm",
> -                              GPR32, GPR32wsp, WZR, i32>;
> -defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn",
> -                              "addsubimm_operand_i64_posimm",
> -                              "addsubimm_operand_i64_negimm",
> -                              GPR64, GPR64xsp, XZR, i64>;
> -defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp",
> -                              "addsubimm_operand_i32_negimm",
> -                              "addsubimm_operand_i32_posimm",
> -                              GPR32, GPR32wsp, WZR, i32>;
> -defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp",
> -                              "addsubimm_operand_i64_negimm",
> -                              "addsubimm_operand_i64_posimm",
> -                              GPR64, GPR64xsp, XZR, i64>;
> -
> -multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop> {
> -  def _fromsp : InstAlias<"mov $Rd, $Rn",
> -                          (addop GPRsp:$Rd, SP:$Rn, 0),
> -                          0b1>;
> -
> -  def _tosp : InstAlias<"mov $Rd, $Rn",
> -                        (addop SP:$Rd, GPRsp:$Rn, 0),
> -                        0b1>;
> -}
> -
> -// Recall Rxsp is a RegisterClass containing *just* xsp.
> -defm MOVxx : MOVsp<GPR64xsp, Rxsp, ADDxxi_lsl0_s>;
> -defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDwwi_lsl0_s>;
> -
> -//===----------------------------------------------------------------------===//
> -// Add-subtract (shifted register) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS
> -
> -//===-------------------------------
> -// 1. The "shifted register" operands. Shared with logical insts.
> -//===-------------------------------
> -
> -multiclass shift_operands<string prefix, string form> {
> -  def _asmoperand_i32 : AsmOperandClass {
> -    let Name = "Shift" # form # "i32";
> -    let RenderMethod = "addShiftOperands";
> -    let PredicateMethod = "isShift<A64SE::" # form # ", false>";
> -    let DiagnosticType = "AddSubRegShift32";
> -  }
> -
> -  // Note that the operand type is intentionally i64 because the DAGCombiner
> -  // puts these into a canonical form.
> -  def _i32 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
> -    let ParserMatchClass
> -          = !cast<AsmOperandClass>(prefix # "_asmoperand_i32");
> -    let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
> -    let DecoderMethod = "Decode32BitShiftOperand";
> -  }
> -
> -  def _asmoperand_i64 : AsmOperandClass {
> -      let Name = "Shift" # form # "i64";
> -      let RenderMethod = "addShiftOperands";
> -      let PredicateMethod = "isShift<A64SE::" # form # ", true>";
> -      let DiagnosticType = "AddSubRegShift64";
> -  }
> -
> -  def _i64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
> -    let ParserMatchClass
> -          = !cast<AsmOperandClass>(prefix # "_asmoperand_i64");
> -    let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
> -  }
> -}
> -
> -defm lsl_operand : shift_operands<"lsl_operand", "LSL">;
> -defm lsr_operand : shift_operands<"lsr_operand", "LSR">;
> -defm asr_operand : shift_operands<"asr_operand", "ASR">;
> -
> -// Not used for add/sub, but defined here for completeness. The "logical
> -// (shifted register)" instructions *do* have an ROR variant.
> -defm ror_operand : shift_operands<"ror_operand", "ROR">;
> -
> -//===-------------------------------
> -// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions.
> -//===-------------------------------
> -
> -// N.b. the commutable parameter is just !N. It will be first against the wall
> -// when the revolution comes.
> -multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable,
> -                         string asmop, SDPatternOperator opfrag, ValueType ty,
> -                         RegisterClass GPR, list<Register> defs> {
> -  let isCommutable = commutable, Defs = defs in {
> -  def _lsl : A64I_addsubshift<sf, op, s, 0b00,
> -                       (outs GPR:$Rd),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
> -                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> -                       [(set GPR:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
> -                            !cast<Operand>("lsl_operand_" # ty):$Imm6))
> -                       )],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU]>;
> -
> -  def _lsr : A64I_addsubshift<sf, op, s, 0b01,
> -                       (outs GPR:$Rd),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
> -                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> -                       [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
> -                            !cast<Operand>("lsr_operand_" # ty):$Imm6))
> -                       )],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU]>;
> -
> -  def _asr : A64I_addsubshift<sf, op, s, 0b10,
> -                       (outs GPR:$Rd),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("asr_operand_" # ty):$Imm6),
> -                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> -                       [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
> -                            !cast<Operand>("asr_operand_" # ty):$Imm6))
> -                       )],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU]>;
> -  }
> -
> -  def _noshift
> -      : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
> -                 (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
> -                                                      GPR:$Rm, 0)>;
> -
> -  def : Pat<(opfrag ty:$Rn, ty:$Rm),
> -            (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
> -}
> -
> -multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable,
> -                         string asmop, SDPatternOperator opfrag,
> -                         list<Register> defs> {
> -  defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s,
> -                           commutable, asmop, opfrag, i64, GPR64, defs>;
> -  defm www : addsub_shifts<prefix # "www", 0b0, op, s,
> -                           commutable, asmop, opfrag, i32, GPR32, defs>;
> -}
> -
> -
> -defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>;
> -defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>;
> -
> -defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>;
> -defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>;
> -
> -//===-------------------------------
> -// 1. The NEG/NEGS aliases
> -//===-------------------------------
> -
> -multiclass neg_alias<Instruction INST, RegisterClass GPR, Register ZR,
> -                     ValueType ty, Operand shift_operand, SDNode shiftop> {
> -   def : InstAlias<"neg $Rd, $Rm, $Imm6",
> -                   (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
> -
> -   def : Pat<(sub 0, (shiftop ty:$Rm, shift_operand:$Imm6)),
> -             (INST ZR, $Rm, shift_operand:$Imm6)>;
> -}
> -
> -defm : neg_alias<SUBwww_lsl, GPR32, WZR, i32, lsl_operand_i32, shl>;
> -defm : neg_alias<SUBwww_lsr, GPR32, WZR, i32, lsr_operand_i32, srl>;
> -defm : neg_alias<SUBwww_asr, GPR32, WZR, i32, asr_operand_i32, sra>;
> -def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
> -def : Pat<(sub 0, i32:$Rm), (SUBwww_lsl WZR, $Rm, 0)>;
> -
> -defm : neg_alias<SUBxxx_lsl, GPR64, XZR, i64, lsl_operand_i64, shl>;
> -defm : neg_alias<SUBxxx_lsr, GPR64, XZR, i64, lsr_operand_i64, srl>;
> -defm : neg_alias<SUBxxx_asr, GPR64, XZR, i64, asr_operand_i64, sra>;
> -def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
> -def : Pat<(sub 0, i64:$Rm), (SUBxxx_lsl XZR, $Rm, 0)>;
> -
> -// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to
> -// be involved.
> -class negs_alias<Instruction INST, RegisterClass GPR,
> -                 Register ZR, Operand shift_operand, SDNode shiftop>
> -  : InstAlias<"negs $Rd, $Rm, $Imm6",
> -              (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
> -
> -def : negs_alias<SUBSwww_lsl, GPR32, WZR, lsl_operand_i32, shl>;
> -def : negs_alias<SUBSwww_lsr, GPR32, WZR, lsr_operand_i32, srl>;
> -def : negs_alias<SUBSwww_asr, GPR32, WZR, asr_operand_i32, sra>;
> -def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
> -
> -def : negs_alias<SUBSxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>;
> -def : negs_alias<SUBSxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>;
> -def : negs_alias<SUBSxxx_asr, GPR64, XZR, asr_operand_i64, sra>;
> -def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
> -
> -//===-------------------------------
> -// 1. The CMP/CMN aliases
> -//===-------------------------------
> -
> -multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable,
> -                      string asmop, SDPatternOperator opfrag, ValueType ty,
> -                      RegisterClass GPR> {
> -  let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in {
> -  def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00,
> -                       (outs),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
> -                       !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
> -                       [(set NZCV, (opfrag ty:$Rn, (shl ty:$Rm,
> -                            !cast<Operand>("lsl_operand_" # ty):$Imm6))
> -                       )],
> -                       NoItinerary>,
> -             Sched<[WriteCMP, ReadCMP, ReadCMP]>;
> -
> -  def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01,
> -                       (outs),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
> -                       !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
> -                       [(set NZCV, (opfrag ty:$Rn, (srl ty:$Rm,
> -                            !cast<Operand>("lsr_operand_" # ty):$Imm6))
> -                       )],
> -                       NoItinerary>,
> -             Sched<[WriteCMP, ReadCMP, ReadCMP]>;
> -
> -  def _asr : A64I_addsubshift<sf, op, 0b1, 0b10,
> -                       (outs),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("asr_operand_" # ty):$Imm6),
> -                       !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
> -                       [(set NZCV, (opfrag ty:$Rn, (sra ty:$Rm,
> -                            !cast<Operand>("asr_operand_" # ty):$Imm6))
> -                       )],
> -                       NoItinerary>,
> -             Sched<[WriteCMP, ReadCMP, ReadCMP]>;
> -  }
> -
> -  def _noshift
> -      : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
> -                 (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
> -
> -  def : Pat<(opfrag ty:$Rn, ty:$Rm),
> -            (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
> -}
> -
> -defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, i32, GPR32>;
> -defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, i64, GPR64>;
> -
> -defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, i32, GPR32>;
> -defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, i64, GPR64>;
> -
> -//===----------------------------------------------------------------------===//
> -// Add-subtract (with carry) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS
> -
> -multiclass A64I_addsubcarrySizes<bit op, bit s, string asmop> {
> -  let Uses = [NZCV] in {
> -    def www : A64I_addsubcarry<0b0, op, s, 0b000000,
> -                               (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm),
> -                               !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
> -                               [], NoItinerary>,
> -              Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -    def xxx : A64I_addsubcarry<0b1, op, s, 0b000000,
> -                               (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
> -                               !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
> -                               [], NoItinerary>,
> -              Sched<[WriteALU, ReadALU, ReadALU]>;
> -  }
> -}
> -
> -let isCommutable = 1 in {
> -  defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">;
> -}
> -
> -defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">;
> -
> -let Defs = [NZCV] in {
> -  let isCommutable = 1 in {
> -    defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">;
> -  }
> -
> -  defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">;
> -}
> -
> -def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>;
> -def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
> -def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>;
> -def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
> -
> -// Note that adde and sube can form a chain longer than two (e.g. for 256-bit
> -// addition). So the flag-setting instructions are appropriate.
> -def : Pat<(adde i32:$Rn, i32:$Rm), (ADCSwww $Rn, $Rm)>;
> -def : Pat<(adde i64:$Rn, i64:$Rm), (ADCSxxx $Rn, $Rm)>;
> -def : Pat<(sube i32:$Rn, i32:$Rm), (SBCSwww $Rn, $Rm)>;
> -def : Pat<(sube i64:$Rn, i64:$Rm), (SBCSxxx $Rn, $Rm)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Bitfield
> -//===----------------------------------------------------------------------===//
> -// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL,
> -//     UBFIZ, UBFX
> -
> -// Because of the rather complicated nearly-overlapping aliases, the decoding of
> -// this range of instructions is handled manually. The architectural
> -// instructions are BFM, SBFM and UBFM but a disassembler should never produce
> -// these.
> -//
> -// In the end, the best option was to use BFM instructions for decoding under
> -// almost all circumstances, but to create aliasing *Instructions* for each of
> -// the canonical forms and specify a completely custom decoder which would
> -// substitute the correct MCInst as needed.
> -//
> -// This also simplifies instruction selection, parsing etc because the MCInsts
> -// have a shape that's closer to their use in code.
> -
> -//===-------------------------------
> -// 1. The architectural BFM instructions
> -//===-------------------------------
> -
> -def uimm5_asmoperand : AsmOperandClass {
> -  let Name = "UImm5";
> -  let PredicateMethod = "isUImm<5>";
> -  let RenderMethod = "addImmOperands";
> -  let DiagnosticType = "UImm5";
> -}
> -
> -def uimm6_asmoperand : AsmOperandClass {
> -  let Name = "UImm6";
> -  let PredicateMethod = "isUImm<6>";
> -  let RenderMethod = "addImmOperands";
> -  let DiagnosticType = "UImm6";
> -}
> -
> -def bitfield32_imm : Operand<i64>,
> -                     ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32; }]> {
> -  let ParserMatchClass = uimm5_asmoperand;
> -
> -  let DecoderMethod = "DecodeBitfield32ImmOperand";
> -}
> -
> -
> -def bitfield64_imm : Operand<i64>,
> -                     ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
> -  let ParserMatchClass = uimm6_asmoperand;
> -
> -  // Default decoder works in 64-bit case: the 6-bit field can take any value.
> -}
> -
> -multiclass A64I_bitfieldSizes<bits<2> opc, string asmop> {
> -  def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
> -                    (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
> -                    !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
> -                    [], NoItinerary>,
> -             Sched<[WriteALU, ReadALU]> {
> -    let DecoderMethod = "DecodeBitfieldInstruction";
> -  }
> -
> -  def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
> -                    (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
> -                    !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
> -                    [], NoItinerary>,
> -             Sched<[WriteALU, ReadALU]> {
> -    let DecoderMethod = "DecodeBitfieldInstruction";
> -  }
> -}
> -
> -defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">;
> -defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">;
> -
> -// BFM instructions modify the destination register rather than defining it
> -// completely.
> -def BFMwwii :
> -  A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
> -        (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
> -        "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
> -  Sched<[WriteALU, ReadALU, ReadALU]> {
> -  let DecoderMethod = "DecodeBitfieldInstruction";
> -  let Constraints = "$src = $Rd";
> -}
> -
> -def BFMxxii :
> -  A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
> -        (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
> -        "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
> -  Sched<[WriteALU, ReadALU, ReadALU]> {
> -  let DecoderMethod = "DecodeBitfieldInstruction";
> -  let Constraints = "$src = $Rd";
> -}
> -
> -
> -//===-------------------------------
> -// 2. Extend aliases to 64-bit dest
> -//===-------------------------------
> -
> -// Unfortunately the extensions that end up as 64-bits cannot be handled by an
> -// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs
> -// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is
> -// not capable of such a map as far as I'm aware
> -
> -// Note that these instructions are strictly more specific than the
> -// BFM ones (in ImmR) so they can handle their own decoding.
> -class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, ValueType dty,
> -                    string asmop, bits<6> imms, dag pattern>
> -  : A64I_bitfield<sf, opc, sf,
> -                  (outs GPRDest:$Rd), (ins GPR32:$Rn),
> -                  !strconcat(asmop, "\t$Rd, $Rn"),
> -                  [(set dty:$Rd, pattern)], NoItinerary>,
> -    Sched<[WriteALU, ReadALU]> {
> -  let ImmR = 0b000000;
> -  let ImmS = imms;
> -}
> -
> -// Signed extensions
> -def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtb", 7,
> -                         (sext_inreg (anyext i32:$Rn), i8)>;
> -def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxtb", 7,
> -                         (sext_inreg i32:$Rn, i8)>;
> -def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxth", 15,
> -                         (sext_inreg (anyext i32:$Rn), i16)>;
> -def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxth", 15,
> -                         (sext_inreg i32:$Rn, i16)>;
> -def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtw", 31, (sext i32:$Rn)>;
> -
> -// Unsigned extensions
> -def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxtb", 7,
> -                         (and i32:$Rn, 255)>;
> -def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxth", 15,
> -                         (and i32:$Rn, 65535)>;
> -
> -// The 64-bit unsigned variants are not strictly architectural but recommended
> -// for consistency.
> -let isAsmParserOnly = 1 in {
> -  def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxtb", 7,
> -                           (and (anyext i32:$Rn), 255)>;
> -  def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxth", 15,
> -                           (and (anyext i32:$Rn), 65535)>;
> -}
> -
> -// Extra patterns for when the source register is actually 64-bits
> -// too. There's no architectural difference here, it's just LLVM
> -// shinanigans. There's no need for equivalent zero-extension patterns
> -// because they'll already be caught by logical (immediate) matching.
> -def : Pat<(sext_inreg i64:$Rn, i8),
> -          (SXTBxw (EXTRACT_SUBREG $Rn, sub_32))>;
> -def : Pat<(sext_inreg i64:$Rn, i16),
> -          (SXTHxw (EXTRACT_SUBREG $Rn, sub_32))>;
> -def : Pat<(sext_inreg i64:$Rn, i32),
> -          (SXTWxw (EXTRACT_SUBREG $Rn, sub_32))>;
> -
> -
> -//===-------------------------------
> -// 3. Aliases for ASR and LSR (the simple shifts)
> -//===-------------------------------
> -
> -// These also handle their own decoding because ImmS being set makes
> -// them take precedence over BFM.
> -multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> {
> -  def wwi : A64I_bitfield<0b0, opc, 0b0,
> -                    (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR),
> -                    !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
> -                    [(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))],
> -                    NoItinerary>,
> -            Sched<[WriteALU, ReadALU]> {
> -    let ImmS = 31;
> -  }
> -
> -  def xxi : A64I_bitfield<0b1, opc, 0b1,
> -                    (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR),
> -                    !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
> -                    [(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))],
> -                    NoItinerary>,
> -            Sched<[WriteALU, ReadALU]> {
> -    let ImmS = 63;
> -  }
> -
> -}
> -
> -defm ASR : A64I_shift<0b00, "asr", sra>;
> -defm LSR : A64I_shift<0b10, "lsr", srl>;
> -
> -//===-------------------------------
> -// 4. Aliases for LSL
> -//===-------------------------------
> -
> -// Unfortunately LSL and subsequent aliases are much more complicated. We need
> -// to be able to say certain output instruction fields depend in a complex
> -// manner on combinations of input assembly fields).
> -//
> -// MIOperandInfo *might* have been able to do it, but at the cost of
> -// significantly more C++ code.
> -
> -// N.b. contrary to usual practice these operands store the shift rather than
> -// the machine bits in an MCInst. The complexity overhead of consistency
> -// outweighed the benefits in this case (custom asmparser, printer and selection
> -// vs custom encoder).
> -def bitfield32_lsl_imm : Operand<i64>,
> -                         ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
> -  let ParserMatchClass = uimm5_asmoperand;
> -  let EncoderMethod = "getBitfield32LSLOpValue";
> -}
> -
> -def bitfield64_lsl_imm : Operand<i64>,
> -                         ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
> -  let ParserMatchClass = uimm6_asmoperand;
> -  let EncoderMethod = "getBitfield64LSLOpValue";
> -}
> -
> -class A64I_bitfield_lsl<bit sf, RegisterClass GPR, ValueType ty,
> -                        Operand operand>
> -  : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm),
> -                  "lsl\t$Rd, $Rn, $FullImm",
> -                  [(set ty:$Rd, (shl ty:$Rn, operand:$FullImm))],
> -                  NoItinerary>,
> -    Sched<[WriteALU, ReadALU]> {
> -  bits<12> FullImm;
> -  let ImmR = FullImm{5-0};
> -  let ImmS = FullImm{11-6};
> -
> -  // No disassembler allowed because it would overlap with BFM which does the
> -  // actual work.
> -  let isAsmParserOnly = 1;
> -}
> -
> -def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, i32, bitfield32_lsl_imm>;
> -def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, i64, bitfield64_lsl_imm>;
> -
> -//===-------------------------------
> -// 5. Aliases for bitfield extract instructions
> -//===-------------------------------
> -
> -def bfx32_width_asmoperand : AsmOperandClass {
> -  let Name = "BFX32Width";
> -  let PredicateMethod = "isBitfieldWidth<32>";
> -  let RenderMethod = "addBFXWidthOperands";
> -  let DiagnosticType = "Width32";
> -}
> -
> -def bfx32_width : Operand<i64>, ImmLeaf<i64, [{ return true; }]> {
> -  let PrintMethod = "printBFXWidthOperand";
> -  let ParserMatchClass = bfx32_width_asmoperand;
> -}
> -
> -def bfx64_width_asmoperand : AsmOperandClass {
> -  let Name = "BFX64Width";
> -  let PredicateMethod = "isBitfieldWidth<64>";
> -  let RenderMethod = "addBFXWidthOperands";
> -  let DiagnosticType = "Width64";
> -}
> -
> -def bfx64_width : Operand<i64> {
> -  let PrintMethod = "printBFXWidthOperand";
> -  let ParserMatchClass = bfx64_width_asmoperand;
> -}
> -
> -
> -multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> {
> -  def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
> -                       (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
> -                       !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
> -                       [(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU]> {
> -    // As above, no disassembler allowed.
> -    let isAsmParserOnly = 1;
> -  }
> -
> -  def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
> -                       (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
> -                       !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
> -                       [(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU]> {
> -    // As above, no disassembler allowed.
> -    let isAsmParserOnly = 1;
> -  }
> -}
> -
> -defm SBFX :  A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>;
> -defm UBFX :  A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>;
> -
> -// Again, variants based on BFM modify Rd so need it as an input too.
> -def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
> -                          (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
> -                          "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
> -                Sched<[WriteALU, ReadALU, ReadALU]> {
> -  // As above, no disassembler allowed.
> -  let isAsmParserOnly = 1;
> -  let Constraints = "$src = $Rd";
> -}
> -
> -def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
> -                          (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
> -                          "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
> -                Sched<[WriteALU, ReadALU, ReadALU]> {
> -  // As above, no disassembler allowed.
> -  let isAsmParserOnly = 1;
> -  let Constraints = "$src = $Rd";
> -}
> -
> -// SBFX instructions can do a 1-instruction sign-extension of boolean values.
> -def : Pat<(sext_inreg i64:$Rn, i1), (SBFXxxii $Rn, 0, 0)>;
> -def : Pat<(sext_inreg i32:$Rn, i1), (SBFXwwii $Rn, 0, 0)>;
> -def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)),
> -          (SBFXxxii (SUBREG_TO_REG (i64 0), $Rn, sub_32), 0, 0)>;
> -
> -// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
> -// use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
> -def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
> -                                         sub_32)>;
> -
> -//===-------------------------------
> -// 6. Aliases for bitfield insert instructions
> -//===-------------------------------
> -
> -def bfi32_lsb_asmoperand : AsmOperandClass {
> -  let Name = "BFI32LSB";
> -  let PredicateMethod = "isUImm<5>";
> -  let RenderMethod = "addBFILSBOperands<32>";
> -  let DiagnosticType = "UImm5";
> -}
> -
> -def bfi32_lsb : Operand<i64>,
> -                ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
> -  let PrintMethod = "printBFILSBOperand<32>";
> -  let ParserMatchClass = bfi32_lsb_asmoperand;
> -}
> -
> -def bfi64_lsb_asmoperand : AsmOperandClass {
> -  let Name = "BFI64LSB";
> -  let PredicateMethod = "isUImm<6>";
> -  let RenderMethod = "addBFILSBOperands<64>";
> -  let DiagnosticType = "UImm6";
> -}
> -
> -def bfi64_lsb : Operand<i64>,
> -                ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
> -  let PrintMethod = "printBFILSBOperand<64>";
> -  let ParserMatchClass = bfi64_lsb_asmoperand;
> -}
> -
> -// Width verification is performed during conversion so width operand can be
> -// shared between 32/64-bit cases. Still needed for the print method though
> -// because ImmR encodes "width - 1".
> -def bfi32_width_asmoperand : AsmOperandClass {
> -  let Name = "BFI32Width";
> -  let PredicateMethod = "isBitfieldWidth<32>";
> -  let RenderMethod = "addBFIWidthOperands";
> -  let DiagnosticType = "Width32";
> -}
> -
> -def bfi32_width : Operand<i64>,
> -                  ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 32; }]> {
> -  let PrintMethod = "printBFIWidthOperand";
> -  let ParserMatchClass = bfi32_width_asmoperand;
> -}
> -
> -def bfi64_width_asmoperand : AsmOperandClass {
> -  let Name = "BFI64Width";
> -  let PredicateMethod = "isBitfieldWidth<64>";
> -  let RenderMethod = "addBFIWidthOperands";
> -  let DiagnosticType = "Width64";
> -}
> -
> -def bfi64_width : Operand<i64>,
> -                  ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 64; }]> {
> -  let PrintMethod = "printBFIWidthOperand";
> -  let ParserMatchClass = bfi64_width_asmoperand;
> -}
> -
> -multiclass A64I_bitfield_insert<bits<2> opc, string asmop> {
> -  def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
> -                           (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
> -                           !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
> -                           [], NoItinerary>,
> -             Sched<[WriteALU, ReadALU]> {
> -    // As above, no disassembler allowed.
> -    let isAsmParserOnly = 1;
> -  }
> -
> -  def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
> -                           (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
> -                           !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
> -                           [], NoItinerary>,
> -             Sched<[WriteALU, ReadALU]> {
> -    // As above, no disassembler allowed.
> -    let isAsmParserOnly = 1;
> -  }
> -}
> -
> -defm SBFIZ :  A64I_bitfield_insert<0b00, "sbfiz">;
> -defm UBFIZ :  A64I_bitfield_insert<0b10, "ubfiz">;
> -
> -
> -def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
> -                (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
> -                "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
> -              Sched<[WriteALU, ReadALU, ReadALU]> {
> -  // As above, no disassembler allowed.
> -  let isAsmParserOnly = 1;
> -  let Constraints = "$src = $Rd";
> -}
> -
> -def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
> -                (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
> -                "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
> -              Sched<[WriteALU, ReadALU, ReadALU]> {
> -  // As above, no disassembler allowed.
> -  let isAsmParserOnly = 1;
> -  let Constraints = "$src = $Rd";
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Compare and branch (immediate)
> -//===----------------------------------------------------------------------===//
> -// Contains: CBZ, CBNZ
> -
> -class label_asmoperand<int width, int scale> : AsmOperandClass {
> -  let Name = "Label" # width # "_" # scale;
> -  let PredicateMethod = "isLabel<" # width # "," # scale # ">";
> -  let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">";
> -  let DiagnosticType = "Label";
> -}
> -
> -def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>;
> -
> -// All conditional immediate branches are the same really: 19 signed bits scaled
> -// by the instruction-size (4).
> -def bcc_target : Operand<OtherVT> {
> -  // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
> -  let ParserMatchClass = label_wid19_scal4_asmoperand;
> -  let PrintMethod = "printLabelOperand<19, 4>";
> -  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_condbr>";
> -  let OperandType = "OPERAND_PCREL";
> -}
> -
> -multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP> {
> -  let isBranch = 1, isTerminator = 1 in {
> -  def x : A64I_cmpbr<0b1, op,
> -                     (outs),
> -                     (ins GPR64:$Rt, bcc_target:$Label),
> -                     !strconcat(asmop,"\t$Rt, $Label"),
> -                     [(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)],
> -                     NoItinerary>,
> -          Sched<[WriteBr, ReadBr]>;
> -
> -  def w : A64I_cmpbr<0b0, op,
> -                     (outs),
> -                     (ins GPR32:$Rt, bcc_target:$Label),
> -                     !strconcat(asmop,"\t$Rt, $Label"),
> -                     [(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)],
> -                     NoItinerary>,
> -          Sched<[WriteBr, ReadBr]>;
> -  }
> -}
> -
> -defm CBZ  : cmpbr_sizes<0b0, "cbz",  ImmLeaf<i32, [{
> -  return Imm == A64CC::EQ;
> -}]> >;
> -defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf<i32, [{
> -  return Imm == A64CC::NE;
> -}]> >;
> -
> -//===----------------------------------------------------------------------===//
> -// Conditional branch (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: B.cc
> -
> -def cond_code_asmoperand : AsmOperandClass {
> -  let Name = "CondCode";
> -  let DiagnosticType = "CondCode";
> -}
> -
> -def cond_code : Operand<i32>, ImmLeaf<i32, [{
> -  return Imm >= 0 && Imm <= 15;
> -}]> {
> -  let PrintMethod = "printCondCodeOperand";
> -  let ParserMatchClass = cond_code_asmoperand;
> -}
> -
> -def Bcc : A64I_condbr<0b0, 0b0, (outs),
> -                (ins cond_code:$Cond, bcc_target:$Label),
> -                "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)],
> -                NoItinerary>,
> -          Sched<[WriteBr]> {
> -  let Uses = [NZCV];
> -  let isBranch = 1;
> -  let isTerminator = 1;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Conditional compare (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: CCMN, CCMP
> -
> -def uimm4_asmoperand : AsmOperandClass {
> -  let Name = "UImm4";
> -  let PredicateMethod = "isUImm<4>";
> -  let RenderMethod = "addImmOperands";
> -  let DiagnosticType = "UImm4";
> -}
> -
> -def uimm4 : Operand<i32> {
> -  let ParserMatchClass = uimm4_asmoperand;
> -}
> -
> -def uimm5 : Operand<i32> {
> -  let ParserMatchClass = uimm5_asmoperand;
> -}
> -
> -// The only difference between this operand and the one for instructions like
> -// B.cc is that it's parsed manually. The other get parsed implicitly as part of
> -// the mnemonic handling.
> -def cond_code_op_asmoperand : AsmOperandClass {
> -  let Name = "CondCodeOp";
> -  let RenderMethod = "addCondCodeOperands";
> -  let PredicateMethod = "isCondCode";
> -  let ParserMethod = "ParseCondCodeOperand";
> -  let DiagnosticType = "CondCode";
> -}
> -
> -def cond_code_op : Operand<i32> {
> -  let PrintMethod = "printCondCodeOperand";
> -  let ParserMatchClass = cond_code_op_asmoperand;
> -}
> -
> -class A64I_condcmpimmImpl<bit sf, bit op, RegisterClass GPR, string asmop>
> -  : A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs),
> -                (ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond),
> -                !strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"),
> -                [], NoItinerary>,
> -    Sched<[WriteCMP, ReadCMP]> {
> -  let Defs = [NZCV];
> -}
> -
> -def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">;
> -def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">;
> -def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">;
> -def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">;
> -
> -//===----------------------------------------------------------------------===//
> -// Conditional compare (register) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: CCMN, CCMP
> -
> -class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop>
> -  : A64I_condcmpreg<sf, op, 0b0, 0b0, 0b1,
> -                    (outs),
> -                    (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
> -                    !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
> -                    [], NoItinerary>,
> -    Sched<[WriteCMP, ReadCMP, ReadCMP]> {
> -  let Defs = [NZCV];
> -}
> -
> -def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">;
> -def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">;
> -def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">;
> -def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">;
> -
> -//===----------------------------------------------------------------------===//
> -// Conditional select instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG
> -
> -// Condition code which is encoded as the inversion (semantically rather than
> -// bitwise) in the instruction.
> -def inv_cond_code_op_asmoperand : AsmOperandClass {
> -  let Name = "InvCondCodeOp";
> -  let RenderMethod = "addInvCondCodeOperands";
> -  let PredicateMethod = "isCondCode";
> -  let ParserMethod = "ParseCondCodeOperand";
> -  let DiagnosticType = "CondCode";
> -}
> -
> -def inv_cond_code_op : Operand<i32> {
> -  let ParserMatchClass = inv_cond_code_op_asmoperand;
> -  let PrintMethod = "printInverseCondCodeOperand";
> -}
> -
> -// Having a separate operand for the selectable use-case is debatable, but gives
> -// consistency with cond_code.
> -def inv_cond_XFORM : SDNodeXForm<imm, [{
> -  A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(N->getZExtValue());
> -  return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32);
> -}]>;
> -
> -def inv_cond_code
> -  : ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 15; }], inv_cond_XFORM>;
> -
> -
> -multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
> -                             SDPatternOperator select> {
> -  let Uses = [NZCV] in {
> -    def wwwc : A64I_condsel<0b0, op, 0b0, op2,
> -                            (outs GPR32:$Rd),
> -                            (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond),
> -                            !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
> -                            [(set i32:$Rd, (select i32:$Rn, i32:$Rm))],
> -                            NoItinerary>,
> -               Sched<[WriteCMP, ReadCMP, ReadCMP]>;
> -
> -
> -    def xxxc : A64I_condsel<0b1, op, 0b0, op2,
> -                            (outs GPR64:$Rd),
> -                            (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond),
> -                            !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
> -                            [(set i64:$Rd, (select i64:$Rn, i64:$Rm))],
> -                            NoItinerary>,
> -               Sched<[WriteCMP, ReadCMP, ReadCMP]>;
> -  }
> -}
> -
> -def simple_select
> -  : PatFrag<(ops node:$lhs, node:$rhs),
> -            (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>;
> -
> -class complex_select<SDPatternOperator opnode>
> -  : PatFrag<(ops node:$lhs, node:$rhs),
> -        (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>;
> -
> -
> -defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>;
> -defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc",
> -                               complex_select<PatFrag<(ops node:$val),
> -                                                      (add node:$val, 1)>>>;
> -defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select<not>>;
> -defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select<ineg>>;
> -
> -// Now the instruction aliases, which fit nicely into LLVM's model:
> -
> -def : InstAlias<"cset $Rd, $Cond",
> -                (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cset $Rd, $Cond",
> -                (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"csetm $Rd, $Cond",
> -                (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"csetm $Rd, $Cond",
> -                (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cinc $Rd, $Rn, $Cond",
> -           (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cinc $Rd, $Rn, $Cond",
> -           (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cinv $Rd, $Rn, $Cond",
> -           (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cinv $Rd, $Rn, $Cond",
> -           (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cneg $Rd, $Rn, $Cond",
> -           (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cneg $Rd, $Rn, $Cond",
> -           (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
> -
> -// Finally some helper patterns.
> -
> -// For CSET (a.k.a. zero-extension of icmp)
> -def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
> -          (CSINCwwwc WZR, WZR, cond_code:$Cond)>;
> -def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
> -          (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>;
> -
> -def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
> -          (CSINCxxxc XZR, XZR, cond_code:$Cond)>;
> -def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
> -          (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>;
> -
> -// For CSETM (a.k.a. sign-extension of icmp)
> -def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
> -          (CSINVwwwc WZR, WZR, cond_code:$Cond)>;
> -def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
> -          (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>;
> -
> -def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
> -          (CSINVxxxc XZR, XZR, cond_code:$Cond)>;
> -def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
> -          (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>;
> -
> -// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of
> -// commutativity. The instructions are to complex for isCommutable to be used,
> -// so we have to create the patterns manually:
> -
> -// No commutable pattern for CSEL since the commuted version is isomorphic.
> -
> -// CSINC
> -def :Pat<(A64select_cc NZCV, (add i32:$Rm, 1), i32:$Rn, inv_cond_code:$Cond),
> -         (CSINCwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
> -def :Pat<(A64select_cc NZCV, (add i64:$Rm, 1), i64:$Rn, inv_cond_code:$Cond),
> -         (CSINCxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
> -
> -// CSINV
> -def :Pat<(A64select_cc NZCV, (not i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
> -         (CSINVwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
> -def :Pat<(A64select_cc NZCV, (not i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
> -         (CSINVxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
> -
> -// CSNEG
> -def :Pat<(A64select_cc NZCV, (ineg i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
> -         (CSNEGwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
> -def :Pat<(A64select_cc NZCV, (ineg i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
> -         (CSNEGxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Data Processing (1 source) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: RBIT, REV16, REV, REV32, CLZ, CLS.
> -
> -// We define an unary operator which always fails. We will use this to
> -// define unary operators that cannot be matched.
> -
> -class A64I_dp_1src_impl<bit sf, bits<6> opcode, string asmop,
> -                   list<dag> patterns, RegisterClass GPRrc,
> -                   InstrItinClass itin>:
> -      A64I_dp_1src<sf,
> -                   0,
> -                   0b00000,
> -                   opcode,
> -                   !strconcat(asmop, "\t$Rd, $Rn"),
> -                   (outs GPRrc:$Rd),
> -                   (ins GPRrc:$Rn),
> -                   patterns,
> -                   itin>,
> -      Sched<[WriteALU, ReadALU]>;
> -
> -multiclass A64I_dp_1src <bits<6> opcode, string asmop> {
> -  let hasSideEffects = 0 in {
> -    def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>;
> -    def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>;
> -  }
> -}
> -
> -defm RBIT  : A64I_dp_1src<0b000000, "rbit">;
> -defm CLS   : A64I_dp_1src<0b000101, "cls">;
> -defm CLZ   : A64I_dp_1src<0b000100, "clz">;
> -
> -def : Pat<(ctlz i32:$Rn), (CLZww $Rn)>;
> -def : Pat<(ctlz i64:$Rn), (CLZxx $Rn)>;
> -def : Pat<(ctlz_zero_undef i32:$Rn), (CLZww $Rn)>;
> -def : Pat<(ctlz_zero_undef i64:$Rn), (CLZxx $Rn)>;
> -
> -def : Pat<(cttz i32:$Rn), (CLZww (RBITww $Rn))>;
> -def : Pat<(cttz i64:$Rn), (CLZxx (RBITxx $Rn))>;
> -def : Pat<(cttz_zero_undef i32:$Rn), (CLZww (RBITww $Rn))>;
> -def : Pat<(cttz_zero_undef i64:$Rn), (CLZxx (RBITxx $Rn))>;
> -
> -
> -def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev",
> -                              [(set i32:$Rd, (bswap i32:$Rn))],
> -                              GPR32, NoItinerary>;
> -def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev",
> -                              [(set i64:$Rd, (bswap i64:$Rn))],
> -                              GPR64, NoItinerary>;
> -def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32",
> -                          [(set i64:$Rd, (bswap (rotr i64:$Rn, (i64 32))))],
> -                          GPR64, NoItinerary>;
> -def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16",
> -                          [(set i32:$Rd, (bswap (rotr i32:$Rn, (i64 16))))],
> -                          GPR32,
> -                          NoItinerary>;
> -def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>;
> -
> -//===----------------------------------------------------------------------===//
> -// Data Processing (2 sources) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL,
> -//           LSR, ASR, ROR
> -
> -
> -class dp_2src_impl<bit sf, bits<6> opcode, string asmop, list<dag> patterns,
> -                   RegisterClass GPRsp,
> -                   InstrItinClass itin>:
> -      A64I_dp_2src<sf,
> -                   opcode,
> -                   0,
> -                   !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
> -                   (outs GPRsp:$Rd),
> -                   (ins GPRsp:$Rn, GPRsp:$Rm),
> -                   patterns,
> -                   itin>,
> -         Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -multiclass dp_2src_crc<bit c, string asmop> {
> -  def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0},
> -                           !strconcat(asmop, "b"), [], GPR32, NoItinerary>;
> -  def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1},
> -                           !strconcat(asmop, "h"), [], GPR32, NoItinerary>;
> -  def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0},
> -                           !strconcat(asmop, "w"), [], GPR32, NoItinerary>;
> -  def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0,
> -                           !strconcat(asmop, "x\t$Rd, $Rn, $Rm"),
> -                           (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [],
> -                           NoItinerary>,
> -                 Sched<[WriteALU, ReadALU, ReadALU]>;
> -}
> -
> -multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> {
> -   def www : dp_2src_impl<0b0,
> -                         opcode,
> -                         asmop,
> -                         [(set i32:$Rd,
> -                               (op i32:$Rn, (i64 (zext i32:$Rm))))],
> -                         GPR32,
> -                         NoItinerary>;
> -   def xxx : dp_2src_impl<0b1,
> -                         opcode,
> -                         asmop,
> -                         [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
> -                         GPR64,
> -                         NoItinerary>;
> -}
> -
> -
> -multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> {
> -    def www : dp_2src_impl<0b0,
> -                         opcode,
> -                         asmop,
> -                         [(set i32:$Rd, (op i32:$Rn, i32:$Rm))],
> -                         GPR32,
> -                         NoItinerary>;
> -   def xxx : dp_2src_impl<0b1,
> -                         opcode,
> -                         asmop,
> -                         [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
> -                         GPR64,
> -                         NoItinerary>;
> -}
> -
> -// Here we define the data processing 2 source instructions.
> -defm CRC32  : dp_2src_crc<0b0, "crc32">;
> -defm CRC32C : dp_2src_crc<0b1, "crc32c">;
> -
> -let SchedRW = [WriteDiv, ReadDiv, ReadDiv] in {
> -  defm UDIV : dp_2src<0b000010, "udiv", udiv>;
> -  defm SDIV : dp_2src<0b000011, "sdiv", sdiv>;
> -}
> -
> -let SchedRW = [WriteALUs, ReadALU, ReadALU] in {
> -  defm LSLV : dp_2src_zext<0b001000, "lsl", shl>;
> -  defm LSRV : dp_2src_zext<0b001001, "lsr", srl>;
> -  defm ASRV : dp_2src_zext<0b001010, "asr", sra>;
> -  defm RORV : dp_2src_zext<0b001011, "ror", rotr>;
> -}
> -
> -// Extra patterns for an incoming 64-bit value for a 32-bit
> -// operation. Since the LLVM operations are undefined (as in C) if the
> -// RHS is out of range, it's perfectly permissible to discard the high
> -// bits of the GPR64.
> -def : Pat<(shl i32:$Rn, i64:$Rm),
> -          (LSLVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
> -def : Pat<(srl i32:$Rn, i64:$Rm),
> -          (LSRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
> -def : Pat<(sra i32:$Rn, i64:$Rm),
> -          (ASRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
> -def : Pat<(rotr i32:$Rn, i64:$Rm),
> -          (RORVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
> -
> -// Here we define the aliases for the data processing 2 source instructions.
> -def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">;
> -def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">;
> -def ASR_menmonic : MnemonicAlias<"asrv", "asr">;
> -def ROR_menmonic : MnemonicAlias<"rorv", "ror">;
> -
> -//===----------------------------------------------------------------------===//
> -// Data Processing (3 sources) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH
> -//    + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL
> -
> -class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg,
> -                        ValueType AccTy, RegisterClass SrcReg,
> -                        string asmop, dag pattern>
> -  : A64I_dp3<sf, opcode,
> -             (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra),
> -             !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"),
> -             [(set AccTy:$Rd, pattern)], NoItinerary>,
> -    Sched<[WriteMAC, ReadMAC, ReadMAC, ReadMAC]> {
> -  bits<5> Ra;
> -  let Inst{14-10} = Ra;
> -
> -  RegisterClass AccGPR = AccReg;
> -  RegisterClass SrcGPR = SrcReg;
> -}
> -
> -def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, i32, GPR32, "madd",
> -                                 (add i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
> -def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, i64, GPR64, "madd",
> -                                 (add i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
> -
> -def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, i32, GPR32, "msub",
> -                                 (sub i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
> -def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, i64, GPR64, "msub",
> -                                 (sub i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
> -
> -def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, i64, GPR32, "smaddl",
> -                     (add i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
> -def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, i64, GPR32, "smsubl",
> -                     (sub i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
> -
> -def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, i64, GPR32, "umaddl",
> -                     (add i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
> -def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, i64, GPR32, "umsubl",
> -                     (sub i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
> -
> -let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in {
> -  def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd),
> -                          (ins GPR64:$Rn, GPR64:$Rm),
> -                          "umulh\t$Rd, $Rn, $Rm",
> -                          [(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))],
> -                          NoItinerary>,
> -                 Sched<[WriteMAC, ReadMAC, ReadMAC]>;
> -
> -  def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd),
> -                          (ins GPR64:$Rn, GPR64:$Rm),
> -                          "smulh\t$Rd, $Rn, $Rm",
> -                          [(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))],
> -                          NoItinerary>,
> -                 Sched<[WriteMAC, ReadMAC, ReadMAC]>;
> -}
> -
> -multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST,
> -                             Register ZR, dag pattern> {
> -  def : InstAlias<asmop # " $Rd, $Rn, $Rm",
> -                  (INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>;
> -
> -  def : Pat<pattern, (INST $Rn, $Rm, ZR)>;
> -}
> -
> -defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul i32:$Rn, i32:$Rm)>;
> -defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul i64:$Rn, i64:$Rm)>;
> -
> -defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR,
> -                         (sub 0, (mul i32:$Rn, i32:$Rm))>;
> -defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR,
> -                         (sub 0, (mul i64:$Rn, i64:$Rm))>;
> -
> -defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR,
> -                         (mul (i64 (sext i32:$Rn)), (sext i32:$Rm))>;
> -defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR,
> -                       (sub 0, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
> -
> -defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR,
> -                         (mul (i64 (zext i32:$Rn)), (zext i32:$Rm))>;
> -defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR,
> -                       (sub 0, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
> -
> -
> -//===----------------------------------------------------------------------===//
> -// Exception generation
> -//===----------------------------------------------------------------------===//
> -// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3
> -
> -def uimm16_asmoperand : AsmOperandClass {
> -  let Name = "UImm16";
> -  let PredicateMethod = "isUImm<16>";
> -  let RenderMethod = "addImmOperands";
> -  let DiagnosticType = "UImm16";
> -}
> -
> -def uimm16 : Operand<i32> {
> -  let ParserMatchClass = uimm16_asmoperand;
> -}
> -
> -class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop>
> -  : A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16),
> -                   !strconcat(asmop, "\t$UImm16"), [], NoItinerary>,
> -    Sched<[WriteBr]> {
> -  let isBranch = 1;
> -  let isTerminator = 1;
> -}
> -
> -def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">;
> -def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">;
> -def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">;
> -def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">;
> -def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">;
> -
> -def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">;
> -def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">;
> -def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">;
> -
> -// The immediate is optional for the DCPS instructions, defaulting to 0.
> -def : InstAlias<"dcps1", (DCPS1i 0)>;
> -def : InstAlias<"dcps2", (DCPS2i 0)>;
> -def : InstAlias<"dcps3", (DCPS3i 0)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Extract (immediate)
> -//===----------------------------------------------------------------------===//
> -// Contains: EXTR + alias ROR
> -
> -def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0,
> -                            (outs GPR32:$Rd),
> -                            (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB),
> -                            "extr\t$Rd, $Rn, $Rm, $LSB",
> -                            [(set i32:$Rd,
> -                                  (A64Extr i32:$Rn, i32:$Rm, imm:$LSB))],
> -                            NoItinerary>,
> -               Sched<[WriteALU, ReadALU, ReadALU]>;
> -def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1,
> -                            (outs GPR64:$Rd),
> -                            (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB),
> -                            "extr\t$Rd, $Rn, $Rm, $LSB",
> -                            [(set i64:$Rd,
> -                                  (A64Extr i64:$Rn, i64:$Rm, imm:$LSB))],
> -                            NoItinerary>,
> -               Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -def : InstAlias<"ror $Rd, $Rs, $LSB",
> -               (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>;
> -def : InstAlias<"ror $Rd, $Rs, $LSB",
> -               (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>;
> -
> -def : Pat<(rotr i32:$Rn, bitfield32_imm:$LSB),
> -          (EXTRwwwi $Rn, $Rn, bitfield32_imm:$LSB)>;
> -def : Pat<(rotr i64:$Rn, bitfield64_imm:$LSB),
> -          (EXTRxxxi $Rn, $Rn, bitfield64_imm:$LSB)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point compare instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FCMP, FCMPE
> -
> -def fpzero_asmoperand : AsmOperandClass {
> -  let Name = "FPZero";
> -  let ParserMethod = "ParseFPImmOperand";
> -  let DiagnosticType = "FPZero";
> -}
> -
> -def fpz32 : Operand<f32>,
> -            ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
> -  let ParserMatchClass = fpzero_asmoperand;
> -  let PrintMethod = "printFPZeroOperand";
> -  let DecoderMethod = "DecodeFPZeroOperand";
> -}
> -
> -def fpz64 : Operand<f64>,
> -            ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
> -  let ParserMatchClass = fpzero_asmoperand;
> -  let PrintMethod = "printFPZeroOperand";
> -  let DecoderMethod = "DecodeFPZeroOperand";
> -}
> -
> -def fpz64movi : Operand<i64>,
> -            ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
> -  let ParserMatchClass = fpzero_asmoperand;
> -  let PrintMethod = "printFPZeroOperand";
> -  let DecoderMethod = "DecodeFPZeroOperand";
> -}
> -
> -multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
> -  def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
> -                          (outs), ins, "fcmp\t$Rn, $Rm", [pattern],
> -                          NoItinerary>,
> -               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
> -    let Defs = [NZCV];
> -  }
> -
> -  def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0},
> -                        (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary>,
> -             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
> -    let Defs = [NZCV];
> -  }
> -}
> -
> -defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm),
> -                               (set NZCV, (A64cmp f32:$Rn, f32:$Rm))>;
> -defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm),
> -                               (set NZCV, (A64cmp f64:$Rn, f64:$Rm))>;
> -
> -// What would be Rm should be written as 0; note that even though it's called
> -// "$Rm" here to fit in with the InstrFormats, it's actually an immediate.
> -defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm),
> -                               (set NZCV, (A64cmp f32:$Rn, fpz32:$Rm))>;
> -
> -defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm),
> -                               (set NZCV, (A64cmp f64:$Rn, fpz64:$Rm))>;
> -
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point conditional compare instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FCCMP, FCCMPE
> -
> -class A64I_fpccmpImpl<bits<2> type, bit op, RegisterClass FPR, string asmop>
> -  : A64I_fpccmp<0b0, 0b0, type, op,
> -                (outs),
> -                (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
> -                !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
> -                [], NoItinerary>,
> -    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
> -  let Defs = [NZCV];
> -}
> -
> -def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">;
> -def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">;
> -def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">;
> -def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">;
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point conditional select instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FCSEL
> -
> -let Uses = [NZCV] in {
> -  def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd),
> -                                 (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond),
> -                                 "fcsel\t$Rd, $Rn, $Rm, $Cond",
> -                                 [(set f32:$Rd,
> -                                       (simple_select f32:$Rn, f32:$Rm))],
> -                                 NoItinerary>,
> -                  Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
> -
> -
> -  def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd),
> -                                 (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond),
> -                                 "fcsel\t$Rd, $Rn, $Rm, $Cond",
> -                                 [(set f64:$Rd,
> -                                       (simple_select f64:$Rn, f64:$Rm))],
> -                                 NoItinerary>,
> -                  Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point data-processing (1 source)
> -//===----------------------------------------------------------------------===//
> -// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI].
> -
> -def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val),
> -                       [{ (void)N; return false; }]>;
> -
> -// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d"
> -// syntax. Default to no pattern because most are odd enough not to have one.
> -multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr,
> -                           SDPatternOperator opnode = FPNoUnop> {
> -  def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn),
> -                     !strconcat(asmstr, "\t$Rd, $Rn"),
> -                     [(set f32:$Rd, (opnode f32:$Rn))],
> -                     NoItinerary>,
> -           Sched<[WriteFPALU, ReadFPALU]>;
> -
> -  def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn),
> -                     !strconcat(asmstr, "\t$Rd, $Rn"),
> -                     [(set f64:$Rd, (opnode f64:$Rn))],
> -                     NoItinerary>,
> -           Sched<[WriteFPALU, ReadFPALU]>;
> -}
> -
> -defm FMOV   : A64I_fpdp1sizes<0b000000, "fmov">;
> -defm FABS   : A64I_fpdp1sizes<0b000001, "fabs", fabs>;
> -defm FNEG   : A64I_fpdp1sizes<0b000010, "fneg", fneg>;
> -let SchedRW = [WriteFPSqrt, ReadFPSqrt] in {
> -  defm FSQRT  : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>;
> -}
> -
> -defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">;
> -defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>;
> -defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>;
> -defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>;
> -defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">;
> -defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>;
> -defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>;
> -
> -// The FCVT instrucitons have different source and destination register-types,
> -// but the fields are uniform everywhere a D-register (say) crops up. Package
> -// this information in a Record.
> -class FCVTRegType<RegisterClass rc, bits<2> fld, ValueType vt> {
> -    RegisterClass Class = rc;
> -    ValueType VT = vt;
> -    bit t1 = fld{1};
> -    bit t0 = fld{0};
> -}
> -
> -def FCVT16 : FCVTRegType<FPR16, 0b11, f16>;
> -def FCVT32 : FCVTRegType<FPR32, 0b00, f32>;
> -def FCVT64 : FCVTRegType<FPR64, 0b01, f64>;
> -
> -class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode>
> -  : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0},
> -               {0,0,0,1, DestReg.t1, DestReg.t0},
> -               (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn),
> -               "fcvt\t$Rd, $Rn",
> -               [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>,
> -    Sched<[WriteFPALU, ReadFPALU]>;
> -
> -def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>;
> -def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>;
> -def FCVTsd : A64I_fpdp1_fcvt<FCVT32, FCVT64, fround>;
> -def FCVThd : A64I_fpdp1_fcvt<FCVT16, FCVT64, fround>;
> -def FCVTsh : A64I_fpdp1_fcvt<FCVT32, FCVT16, fextend>;
> -def FCVTdh : A64I_fpdp1_fcvt<FCVT64, FCVT16, fextend>;
> -
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point data-processing (2 sources) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL
> -
> -def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs),
> -                      [{ (void)N; return false; }]>;
> -
> -multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr,
> -                           SDPatternOperator opnode> {
> -  def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode,
> -                      (outs FPR32:$Rd),
> -                      (ins FPR32:$Rn, FPR32:$Rm),
> -                      !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
> -                      [(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))],
> -                      NoItinerary>,
> -            Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
> -
> -  def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode,
> -                      (outs FPR64:$Rd),
> -                      (ins FPR64:$Rn, FPR64:$Rm),
> -                      !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
> -                      [(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))],
> -                      NoItinerary>,
> -            Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
> -}
> -
> -let isCommutable = 1 in {
> -  let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
> -    defm FMUL   : A64I_fpdp2sizes<0b0000, "fmul", fmul>;
> -  }
> -  defm FADD   : A64I_fpdp2sizes<0b0010, "fadd", fadd>;
> -
> -  // No patterns for these.
> -  defm FMAX   : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>;
> -  defm FMIN   : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>;
> -  defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>;
> -  defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>;
> -
> -  let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
> -    defm FNMUL  : A64I_fpdp2sizes<0b1000, "fnmul",
> -                                  PatFrag<(ops node:$lhs, node:$rhs),
> -                                          (fneg (fmul node:$lhs, node:$rhs))> >;
> -  }
> -}
> -
> -let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in {
> -  defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>;
> -}
> -defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>;
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point data-processing (3 sources) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FMADD, FMSUB, FNMADD, FNMSUB
> -
> -def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
> -                    (fma (fneg node:$Rn),  node:$Rm, node:$Ra)>;
> -def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
> -                     (fma node:$Rn,  node:$Rm, (fneg node:$Ra))>;
> -def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
> -                     (fma (fneg node:$Rn),  node:$Rm, (fneg node:$Ra))>;
> -
> -class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT,
> -                     bits<2> type, bit o1, bit o0, SDPatternOperator fmakind>
> -  : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd),
> -               (ins FPR:$Rn, FPR:$Rm, FPR:$Ra),
> -               !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"),
> -               [(set VT:$Rd, (fmakind VT:$Rn, VT:$Rm, VT:$Ra))],
> -               NoItinerary>,
> -    Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]>;
> -
> -def FMADDssss  : A64I_fpdp3Impl<"fmadd",  FPR32, f32, 0b00, 0b0, 0b0, fma>;
> -def FMSUBssss  : A64I_fpdp3Impl<"fmsub",  FPR32, f32, 0b00, 0b0, 0b1, fmsub>;
> -def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>;
> -def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>;
> -
> -def FMADDdddd  : A64I_fpdp3Impl<"fmadd",  FPR64, f64, 0b01, 0b0, 0b0, fma>;
> -def FMSUBdddd  : A64I_fpdp3Impl<"fmsub",  FPR64, f64, 0b01, 0b0, 0b1, fmsub>;
> -def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;
> -def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
> -
> -// Extra patterns for when we're allowed to optimise separate multiplication and
> -// addition.
> -let Predicates = [HasFPARMv8, UseFusedMAC] in {
> -def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))),
> -          (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
> -def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))),
> -          (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
> -def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))),
> -          (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
> -def : Pat<(f32 (fsub (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)),
> -          (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
> -
> -def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))),
> -          (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
> -def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))),
> -          (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
> -def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))),
> -          (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
> -def : Pat<(f64 (fsub (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)),
> -          (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
> -}
> -
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point <-> fixed-point conversion instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
> -
> -// #1-#32 allowed, encoded as "64 - <specified imm>
> -def fixedpos_asmoperand_i32 : AsmOperandClass {
> -  let Name = "CVTFixedPos32";
> -  let RenderMethod = "addCVTFixedPosOperands";
> -  let PredicateMethod = "isCVTFixedPos<32>";
> -  let DiagnosticType = "CVTFixedPos32";
> -}
> -
> -// Also encoded as "64 - <specified imm>" but #1-#64 allowed.
> -def fixedpos_asmoperand_i64 : AsmOperandClass {
> -  let Name = "CVTFixedPos64";
> -  let RenderMethod = "addCVTFixedPosOperands";
> -  let PredicateMethod = "isCVTFixedPos<64>";
> -  let DiagnosticType = "CVTFixedPos64";
> -}
> -
> -// We need the cartesian product of f32/f64 i32/i64 operands for
> -// conversions:
> -//   + Selection needs to use operands of correct floating type
> -//   + Assembly parsing and decoding depend on integer width
> -class cvtfix_i32_op<ValueType FloatVT>
> -  : Operand<FloatVT>,
> -    ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm]> {
> -  let ParserMatchClass = fixedpos_asmoperand_i32;
> -  let DecoderMethod = "DecodeCVT32FixedPosOperand";
> -  let PrintMethod = "printCVTFixedPosOperand";
> -}
> -
> -class cvtfix_i64_op<ValueType FloatVT>
> -  : Operand<FloatVT>,
> -    ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm]> {
> -  let ParserMatchClass = fixedpos_asmoperand_i64;
> -  let PrintMethod = "printCVTFixedPosOperand";
> -}
> -
> -// Because of the proliferation of weird operands, it's not really
> -// worth going for a multiclass here. Oh well.
> -
> -class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode,
> -                   RegisterClass GPR, RegisterClass FPR,
> -                   ValueType DstTy, ValueType SrcTy,
> -                   Operand scale_op, string asmop, SDNode cvtop>
> -  : A64I_fpfixed<sf, 0b0, type, 0b11, opcode,
> -                 (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale),
> -                 !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
> -                 [(set DstTy:$Rd, (cvtop (fmul SrcTy:$Rn, scale_op:$Scale)))],
> -                 NoItinerary>,
> -    Sched<[WriteFPALU, ReadFPALU]>;
> -
> -def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32,
> -                             cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>;
> -def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, i64, f32,
> -                             cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>;
> -def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, i32, f32,
> -                             cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>;
> -def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, i64, f32,
> -                             cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>;
> -
> -def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, i32, f64,
> -                             cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>;
> -def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, i64, f64,
> -                             cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>;
> -def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, i32, f64,
> -                             cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>;
> -def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, i64, f64,
> -                             cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>;
> -
> -
> -class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode,
> -                   RegisterClass FPR, RegisterClass GPR,
> -                   ValueType DstTy, ValueType SrcTy,
> -                   Operand scale_op, string asmop, SDNode cvtop>
> -  : A64I_fpfixed<sf, 0b0, type, 0b00, opcode,
> -                 (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale),
> -                 !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
> -                 [(set DstTy:$Rd, (fdiv (cvtop SrcTy:$Rn), scale_op:$Scale))],
> -                 NoItinerary>,
> -    Sched<[WriteFPALU, ReadFPALU]>;
> -
> -def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32,
> -                            cvtfix_i32_op<f32>, "scvtf", sint_to_fp>;
> -def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, f32, i64,
> -                            cvtfix_i64_op<f32>, "scvtf", sint_to_fp>;
> -def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, f32, i32,
> -                            cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>;
> -def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, f32, i64,
> -                            cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>;
> -def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, f64, i32,
> -                            cvtfix_i32_op<f64>, "scvtf", sint_to_fp>;
> -def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, f64, i64,
> -                            cvtfix_i64_op<f64>, "scvtf", sint_to_fp>;
> -def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, f64, i32,
> -                            cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>;
> -def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, f64, i64,
> -                            cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>;
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point <-> integer conversion instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
> -
> -class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode,
> -                   RegisterClass DestPR, RegisterClass SrcPR, string asmop>
> -  : A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn),
> -               !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>,
> -    Sched<[WriteFPALU, ReadFPALU]>;
> -
> -multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop> {
> -  def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0},
> -                        GPR32, FPR32, asmop # "s">;
> -  def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0},
> -                        GPR64, FPR32, asmop # "s">;
> -  def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1},
> -                        GPR32, FPR32, asmop # "u">;
> -  def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1},
> -                        GPR64, FPR32, asmop # "u">;
> -
> -  def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0},
> -                        GPR32, FPR64, asmop # "s">;
> -  def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0},
> -                        GPR64, FPR64, asmop # "s">;
> -  def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1},
> -                        GPR32, FPR64, asmop # "u">;
> -  def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1},
> -                        GPR64, FPR64, asmop # "u">;
> -}
> -
> -defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">;
> -defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">;
> -defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">;
> -defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">;
> -defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">;
> -
> -let Predicates = [HasFPARMv8] in {
> -def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>;
> -def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>;
> -def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>;
> -def : Pat<(i64 (fp_to_uint f32:$Rn)), (FCVTZUxs $Rn)>;
> -def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>;
> -def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>;
> -def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>;
> -def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>;
> -}
> -
> -multiclass A64I_inttofp<bit o0, string asmop> {
> -  def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>;
> -  def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>;
> -  def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>;
> -  def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>;
> -}
> -
> -defm S : A64I_inttofp<0b0, "scvtf">;
> -defm U : A64I_inttofp<0b1, "ucvtf">;
> -
> -let Predicates = [HasFPARMv8] in {
> -def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>;
> -def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>;
> -def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>;
> -def : Pat<(f64 (sint_to_fp i64:$Rn)), (SCVTFdx $Rn)>;
> -def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>;
> -def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>;
> -def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>;
> -def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>;
> -}
> -
> -def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">;
> -def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">;
> -def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">;
> -def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">;
> -
> -let Predicates = [HasFPARMv8] in {
> -def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>;
> -def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>;
> -def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>;
> -def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>;
> -}
> -
> -def lane1_asmoperand : AsmOperandClass {
> -  let Name = "Lane1";
> -  let RenderMethod = "addImmOperands";
> -  let DiagnosticType = "Lane1";
> -}
> -
> -def lane1 : Operand<i32> {
> -  let ParserMatchClass = lane1_asmoperand;
> -  let PrintMethod = "printBareImmOperand";
> -}
> -
> -let DecoderMethod =  "DecodeFMOVLaneInstruction" in {
> -  def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110,
> -                          (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane),
> -                          "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>,
> -               Sched<[WriteFPALU, ReadFPALU]>;
> -
> -  def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111,
> -                          (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane),
> -                          "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>,
> -               Sched<[WriteFPALU, ReadFPALU]>;
> -}
> -
> -let Predicates = [HasFPARMv8] in {
> -def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]",
> -                (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>;
> -
> -def : InstAlias<"fmov $Rd.2d[$Lane], $Rn",
> -                (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point immediate instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FMOV
> -
> -def fpimm_asmoperand : AsmOperandClass {
> -  let Name = "FMOVImm";
> -  let ParserMethod = "ParseFPImmOperand";
> -  let DiagnosticType = "FPImm";
> -}
> -
> -// The MCOperand for these instructions are the encoded 8-bit values.
> -def SDXF_fpimm : SDNodeXForm<fpimm, [{
> -  uint32_t Imm8;
> -  A64Imms::isFPImm(N->getValueAPF(), Imm8);
> -  return CurDAG->getTargetConstant(Imm8, MVT::i32);
> -}]>;
> -
> -class fmov_operand<ValueType FT>
> -  : Operand<i32>,
> -    PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }],
> -            SDXF_fpimm> {
> -  let PrintMethod = "printFPImmOperand";
> -  let ParserMatchClass = fpimm_asmoperand;
> -}
> -
> -def fmov32_operand : fmov_operand<f32>;
> -def fmov64_operand : fmov_operand<f64>;
> -
> -class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT,
> -                      Operand fmov_operand>
> -  : A64I_fpimm<0b0, 0b0, type, 0b00000,
> -               (outs Reg:$Rd),
> -               (ins fmov_operand:$Imm8),
> -               "fmov\t$Rd, $Imm8",
> -               [(set VT:$Rd, fmov_operand:$Imm8)],
> -               NoItinerary>,
> -    Sched<[WriteFPALU]>;
> -
> -def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>;
> -def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>;
> -
> -//===----------------------------------------------------------------------===//
> -// Load-register (literal) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: LDR, LDRSW, PRFM
> -
> -def ldrlit_label_asmoperand : AsmOperandClass {
> -  let Name = "LoadLitLabel";
> -  let RenderMethod = "addLabelOperands<19, 4>";
> -  let DiagnosticType = "Label";
> -}
> -
> -def ldrlit_label : Operand<i64> {
> -  let EncoderMethod = "getLoadLitLabelOpValue";
> -
> -  // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
> -  let PrintMethod = "printLabelOperand<19, 4>";
> -  let ParserMatchClass = ldrlit_label_asmoperand;
> -  let OperandType = "OPERAND_PCREL";
> -}
> -
> -// Various instructions take an immediate value (which can always be used),
> -// where some numbers have a symbolic name to make things easier. These operands
> -// and the associated functions abstract away the differences.
> -multiclass namedimm<string prefix, string mapper> {
> -  def _asmoperand : AsmOperandClass {
> -    let Name = "NamedImm" # prefix;
> -    let PredicateMethod = "isUImm";
> -    let RenderMethod = "addImmOperands";
> -    let ParserMethod = "ParseNamedImmOperand<" # mapper # ">";
> -    let DiagnosticType = "NamedImm_" # prefix;
> -  }
> -
> -  def _op : Operand<i32> {
> -    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
> -    let PrintMethod = "printNamedImmOperand<" # mapper # ">";
> -    let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">";
> -  }
> -}
> -
> -defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">;
> -
> -class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg,
> -                      list<dag> patterns = []>
> -   : A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19),
> -                 "ldr\t$Rt, $Imm19", patterns, NoItinerary>,
> -     Sched<[WriteLd]>;
> -
> -let mayLoad = 1 in {
> -  def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>;
> -  def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>;
> -}
> -
> -let Predicates = [HasFPARMv8] in {
> -def LDRs_lit  : A64I_LDRlitSimple<0b00, 0b1, FPR32>;
> -def LDRd_lit  : A64I_LDRlitSimple<0b01, 0b1, FPR64>;
> -}
> -
> -let mayLoad = 1 in {
> -  let Predicates = [HasFPARMv8] in {
> -  def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>;
> -  }
> -
> -  def LDRSWx_lit : A64I_LDRlit<0b10, 0b0,
> -                               (outs GPR64:$Rt),
> -                               (ins ldrlit_label:$Imm19),
> -                               "ldrsw\t$Rt, $Imm19",
> -                               [], NoItinerary>,
> -                   Sched<[WriteLd]>;
> -
> -  def PRFM_lit : A64I_LDRlit<0b11, 0b0,
> -                             (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19),
> -                             "prfm\t$Rt, $Imm19",
> -                             [], NoItinerary>,
> -                 Sched<[WriteLd, ReadLd]>;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Load-store exclusive instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB,
> -//           STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB,
> -//           STLRH, STLR, LDARB, LDARH, LDAR
> -
> -// Since these instructions have the undefined register bits set to 1 in
> -// their canonical form, we need a post encoder method to set those bits
> -// to 1 when encoding these instructions. We do this using the
> -// fixLoadStoreExclusive function. This function has template parameters:
> -//
> -// fixLoadStoreExclusive<int hasRs, int hasRt2>
> -//
> -// hasRs indicates that the instruction uses the Rs field, so we won't set
> -// it to 1 (and the same for Rt2). We don't need template parameters for
> -// the other register fiels since Rt and Rn are always used.
> -
> -// This operand parses a GPR64xsp register, followed by an optional immediate
> -// #0.
> -def GPR64xsp0_asmoperand : AsmOperandClass {
> -  let Name = "GPR64xsp0";
> -  let PredicateMethod = "isWrappedReg";
> -  let RenderMethod = "addRegOperands";
> -  let ParserMethod = "ParseLSXAddressOperand";
> -  // Diagnostics are provided by ParserMethod
> -}
> -
> -def GPR64xsp0 : RegisterOperand<GPR64xsp> {
> -  let ParserMatchClass = GPR64xsp0_asmoperand;
> -}
> -
> -//===----------------------------------
> -// Store-exclusive (releasing & normal)
> -//===----------------------------------
> -
> -class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
> -                        dag ins, list<dag> pat,
> -                        InstrItinClass itin> :
> -       A64I_LDSTex_stn <size,
> -                        opcode{2}, 0, opcode{1}, opcode{0},
> -                        outs, ins,
> -                        !strconcat(asm, "\t$Rs, $Rt, [$Rn]"),
> -                        pat, itin> {
> -  let mayStore = 1;
> -  let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
> -  let Constraints = "@earlyclobber $Rs";
> -}
> -
> -multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
> -  def _byte:  A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
> -                              (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
> -                              [], NoItinerary>,
> -              Sched<[WriteSt, ReadSt, ReadSt]>;
> -
> -  def _hword:  A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
> -                               (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
> -                               [],NoItinerary>,
> -               Sched<[WriteSt, ReadSt, ReadSt]>;
> -
> -  def _word:  A64I_SRexs_impl<0b10, opcode, asmstr,
> -                              (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
> -                              [], NoItinerary>,
> -              Sched<[WriteSt, ReadSt, ReadSt]>;
> -
> -  def _dword: A64I_SRexs_impl<0b11, opcode, asmstr,
> -                              (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
> -                              [], NoItinerary>,
> -              Sched<[WriteSt, ReadSt, ReadSt]>;
> -}
> -
> -defm STXR  : A64I_SRex<"stxr",  0b000, "STXR">;
> -defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">;
> -
> -//===----------------------------------
> -// Loads
> -//===----------------------------------
> -
> -class A64I_LRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
> -                        dag ins, list<dag> pat,
> -                        InstrItinClass itin> :
> -        A64I_LDSTex_tn <size,
> -                        opcode{2}, 1, opcode{1}, opcode{0},
> -                        outs, ins,
> -                        !strconcat(asm, "\t$Rt, [$Rn]"),
> -                        pat, itin> {
> -  let mayLoad = 1;
> -  let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
> -}
> -
> -multiclass A64I_LRex<string asmstr, bits<3> opcode> {
> -  def _byte:  A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
> -                            (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
> -                            [], NoItinerary>,
> -              Sched<[WriteLd]>;
> -
> -  def _hword:  A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
> -                            (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
> -                            [], NoItinerary>,
> -               Sched<[WriteLd]>;
> -
> -  def _word:  A64I_LRexs_impl<0b10, opcode, asmstr,
> -                            (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
> -                            [], NoItinerary>,
> -              Sched<[WriteLd]>;
> -
> -  def _dword: A64I_LRexs_impl<0b11, opcode, asmstr,
> -                            (outs GPR64:$Rt), (ins GPR64xsp0:$Rn),
> -                            [], NoItinerary>,
> -              Sched<[WriteLd]>;
> -}
> -
> -defm LDXR  : A64I_LRex<"ldxr",  0b000>;
> -defm LDAXR : A64I_LRex<"ldaxr", 0b001>;
> -defm LDAR  : A64I_LRex<"ldar",  0b101>;
> -
> -class acquiring_load<PatFrag base>
> -  : PatFrag<(ops node:$ptr), (base node:$ptr), [{
> -  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
> -  return Ordering == Acquire || Ordering == SequentiallyConsistent;
> -}]>;
> -
> -def atomic_load_acquire_8  : acquiring_load<atomic_load_8>;
> -def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
> -def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
> -def atomic_load_acquire_64 : acquiring_load<atomic_load_64>;
> -
> -def : Pat<(atomic_load_acquire_8  i64:$Rn), (LDAR_byte  $Rn)>;
> -def : Pat<(atomic_load_acquire_16 i64:$Rn), (LDAR_hword $Rn)>;
> -def : Pat<(atomic_load_acquire_32 i64:$Rn), (LDAR_word  $Rn)>;
> -def : Pat<(atomic_load_acquire_64 i64:$Rn), (LDAR_dword $Rn)>;
> -
> -//===----------------------------------
> -// Store-release (no exclusivity)
> -//===----------------------------------
> -
> -class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
> -                        dag ins, list<dag> pat,
> -                        InstrItinClass itin> :
> -        A64I_LDSTex_tn <size,
> -                        opcode{2}, 0, opcode{1}, opcode{0},
> -                        outs, ins,
> -                        !strconcat(asm, "\t$Rt, [$Rn]"),
> -                        pat, itin> {
> -  let mayStore = 1;
> -  let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
> -}
> -
> -class releasing_store<PatFrag base>
> -  : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
> -  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
> -  return Ordering == Release || Ordering == SequentiallyConsistent;
> -}]>;
> -
> -def atomic_store_release_8  : releasing_store<atomic_store_8>;
> -def atomic_store_release_16 : releasing_store<atomic_store_16>;
> -def atomic_store_release_32 : releasing_store<atomic_store_32>;
> -def atomic_store_release_64 : releasing_store<atomic_store_64>;
> -
> -multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> {
> -  def _byte:  A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
> -                            (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
> -                            [(atomic_store_release_8 i64:$Rn, i32:$Rt)],
> -                            NoItinerary>,
> -              Sched<[WriteSt, ReadSt, ReadSt]>;
> -
> -  def _hword:  A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
> -                           (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
> -                           [(atomic_store_release_16 i64:$Rn, i32:$Rt)],
> -                           NoItinerary>,
> -               Sched<[WriteSt, ReadSt, ReadSt]>;
> -
> -  def _word:  A64I_SLexs_impl<0b10, opcode, asmstr,
> -                           (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
> -                           [(atomic_store_release_32 i64:$Rn, i32:$Rt)],
> -                           NoItinerary>,
> -              Sched<[WriteSt, ReadSt, ReadSt]>;
> -
> -  def _dword: A64I_SLexs_impl<0b11, opcode, asmstr,
> -                           (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
> -                           [(atomic_store_release_64 i64:$Rn, i64:$Rt)],
> -                           NoItinerary>,
> -              Sched<[WriteSt, ReadSt, ReadSt]>;
> -}
> -
> -defm STLR  : A64I_SLex<"stlr", 0b101, "STLR">;
> -
> -//===----------------------------------
> -// Store-exclusive pair (releasing & normal)
> -//===----------------------------------
> -
> -class A64I_SPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
> -                        dag ins, list<dag> pat,
> -                        InstrItinClass itin> :
> -     A64I_LDSTex_stt2n <size,
> -                        opcode{2}, 0, opcode{1}, opcode{0},
> -                        outs, ins,
> -                        !strconcat(asm, "\t$Rs, $Rt, $Rt2, [$Rn]"),
> -                        pat, itin> {
> -  let mayStore = 1;
> -}
> -
> -
> -multiclass A64I_SPex<string asmstr, bits<3> opcode> {
> -  def _word:  A64I_SPexs_impl<0b10, opcode, asmstr, (outs),
> -                            (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2,
> -                                 GPR64xsp0:$Rn),
> -                            [], NoItinerary>,
> -              Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
> -
> -  def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs),
> -                            (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2,
> -                                            GPR64xsp0:$Rn),
> -                            [], NoItinerary>,
> -              Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
> -}
> -
> -defm STXP  : A64I_SPex<"stxp", 0b010>;
> -defm STLXP : A64I_SPex<"stlxp", 0b011>;
> -
> -//===----------------------------------
> -// Load-exclusive pair (acquiring & normal)
> -//===----------------------------------
> -
> -class A64I_LPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
> -                        dag ins, list<dag> pat,
> -                        InstrItinClass itin> :
> -      A64I_LDSTex_tt2n <size,
> -                        opcode{2}, 1, opcode{1}, opcode{0},
> -                        outs, ins,
> -                        !strconcat(asm, "\t$Rt, $Rt2, [$Rn]"),
> -                        pat, itin>{
> -  let mayLoad = 1;
> -  let DecoderMethod = "DecodeLoadPairExclusiveInstruction";
> -  let PostEncoderMethod = "fixLoadStoreExclusive<0,1>";
> -}
> -
> -multiclass A64I_LPex<string asmstr, bits<3> opcode> {
> -  def _word:  A64I_LPexs_impl<0b10, opcode, asmstr,
> -                            (outs GPR32:$Rt, GPR32:$Rt2),
> -                            (ins GPR64xsp0:$Rn),
> -                            [], NoItinerary>,
> -              Sched<[WriteLd, WriteLd, ReadLd]>;
> -
> -  def _dword: A64I_LPexs_impl<0b11, opcode, asmstr,
> -                            (outs GPR64:$Rt, GPR64:$Rt2),
> -                            (ins GPR64xsp0:$Rn),
> -                            [], NoItinerary>,
> -              Sched<[WriteLd, WriteLd, ReadLd]>;
> -}
> -
> -defm LDXP  : A64I_LPex<"ldxp", 0b010>;
> -defm LDAXP : A64I_LPex<"ldaxp", 0b011>;
> -
> -//===----------------------------------------------------------------------===//
> -// Load-store register (unscaled immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store register (register offset) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store register (unsigned immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store register (immediate post-indexed) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store register (immediate pre-indexed) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
> -
> -// Note that patterns are much later on in a completely separate section (they
> -// need ADRPxi to be defined).
> -
> -//===-------------------------------
> -// 1. Various operands needed
> -//===-------------------------------
> -
> -//===-------------------------------
> -// 1.1 Unsigned 12-bit immediate operands
> -//===-------------------------------
> -// The addressing mode for these instructions consists of an unsigned 12-bit
> -// immediate which is scaled by the size of the memory access.
> -//
> -// We represent this in the MC layer by two operands:
> -//     1. A base register.
> -//     2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]"
> -//        would have '1' in this field.
> -// This means that separate functions are needed for converting representations
> -// which *are* aware of the intended access size.
> -
> -// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
> -// know the access size via some means. An isolated operand does not have this
> -// information unless told from here, which means we need separate tablegen
> -// Operands for each access size. This multiclass takes care of instantiating
> -// the correct template functions in the rest of the backend.
> -
> -//===-------------------------------
> -// 1.1 Unsigned 12-bit immediate operands
> -//===-------------------------------
> -
> -multiclass offsets_uimm12<int MemSize, string prefix> {
> -  def uimm12_asmoperand : AsmOperandClass {
> -    let Name = "OffsetUImm12_" # MemSize;
> -    let PredicateMethod = "isOffsetUImm12<" # MemSize # ">";
> -    let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">";
> -    let DiagnosticType = "LoadStoreUImm12_" # MemSize;
> -  }
> -
> -  // Pattern is really no more than an ImmLeaf, but predicated on MemSize which
> -  // complicates things beyond TableGen's ken.
> -  def uimm12 : Operand<i64>,
> -               ComplexPattern<i64, 1, "SelectOffsetUImm12<" # MemSize # ">"> {
> -    let ParserMatchClass
> -      = !cast<AsmOperandClass>(prefix # uimm12_asmoperand);
> -
> -    let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">";
> -    let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">";
> -  }
> -}
> -
> -defm byte_  : offsets_uimm12<1, "byte_">;
> -defm hword_ : offsets_uimm12<2, "hword_">;
> -defm word_  : offsets_uimm12<4, "word_">;
> -defm dword_ : offsets_uimm12<8, "dword_">;
> -defm qword_ : offsets_uimm12<16, "qword_">;
> -
> -//===-------------------------------
> -// 1.1 Signed 9-bit immediate operands
> -//===-------------------------------
> -
> -// The MCInst is expected to store the bit-wise encoding of the value,
> -// which amounts to lopping off the extended sign bits.
> -def SDXF_simm9 : SDNodeXForm<imm, [{
> -  return CurDAG->getTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32);
> -}]>;
> -
> -def simm9_asmoperand : AsmOperandClass {
> -  let Name = "SImm9";
> -  let PredicateMethod = "isSImm<9>";
> -  let RenderMethod = "addSImmOperands<9>";
> -  let DiagnosticType = "LoadStoreSImm9";
> -}
> -
> -def simm9 : Operand<i64>,
> -            ImmLeaf<i64, [{ return Imm >= -0x100 && Imm <= 0xff; }],
> -            SDXF_simm9> {
> -  let PrintMethod = "printOffsetSImm9Operand";
> -  let ParserMatchClass = simm9_asmoperand;
> -}
> -
> -
> -//===-------------------------------
> -// 1.3 Register offset extensions
> -//===-------------------------------
> -
> -// The assembly-syntax for these addressing-modes is:
> -//    [<Xn|SP>, <R><m> {, <extend> {<amount>}}]
> -//
> -// The essential semantics are:
> -//     + <amount> is a shift: #<log(transfer size)> or #0
> -//     + <R> can be W or X.
> -//     + If <R> is W, <extend> can be UXTW or SXTW
> -//     + If <R> is X, <extend> can be LSL or SXTX
> -//
> -// The trickiest of those constraints is that Rm can be either GPR32 or GPR64,
> -// which will need separate instructions for LLVM type-consistency. We'll also
> -// need separate operands, of course.
> -multiclass regexts<int MemSize, int RmSize, RegisterClass GPR,
> -                   string Rm, string prefix> {
> -  def regext_asmoperand : AsmOperandClass {
> -    let Name = "AddrRegExtend_" # MemSize # "_" #  Rm;
> -    let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">";
> -    let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">";
> -    let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize;
> -  }
> -
> -  def regext : Operand<i64> {
> -    let PrintMethod
> -      = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">";
> -
> -    let DecoderMethod = "DecodeAddrRegExtendOperand";
> -    let ParserMatchClass
> -      = !cast<AsmOperandClass>(prefix # regext_asmoperand);
> -  }
> -}
> -
> -multiclass regexts_wx<int MemSize, string prefix> {
> -  // Rm is an X-register if LSL or SXTX are specified as the shift.
> -  defm Xm_ : regexts<MemSize, 64, GPR64, "Xm", prefix # "Xm_">;
> -
> -  // Rm is a W-register if UXTW or SXTW are specified as the shift.
> -  defm Wm_ : regexts<MemSize, 32, GPR32, "Wm", prefix # "Wm_">;
> -}
> -
> -defm byte_  : regexts_wx<1, "byte_">;
> -defm hword_ : regexts_wx<2, "hword_">;
> -defm word_  : regexts_wx<4, "word_">;
> -defm dword_ : regexts_wx<8, "dword_">;
> -defm qword_ : regexts_wx<16, "qword_">;
> -
> -
> -//===------------------------------
> -// 2. The instructions themselves.
> -//===------------------------------
> -
> -// We have the following instructions to implement:
> -// |                 | B     | H     | W     | X      |
> -// |-----------------+-------+-------+-------+--------|
> -// | unsigned str    | STRB  | STRH  | STR   | STR    |
> -// | unsigned ldr    | LDRB  | LDRH  | LDR   | LDR    |
> -// | signed ldr to W | LDRSB | LDRSH | -     | -      |
> -// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) |
> -
> -// This will instantiate the LDR/STR instructions you'd expect to use for an
> -// unsigned datatype (first two rows above) or floating-point register, which is
> -// reasonably uniform across all access sizes.
> -
> -
> -//===------------------------------
> -// 2.1 Regular instructions
> -//===------------------------------
> -
> -// This class covers the basic unsigned or irrelevantly-signed loads and stores,
> -// to general-purpose and floating-point registers.
> -
> -class AddrParams<string prefix> {
> -  Operand uimm12 = !cast<Operand>(prefix # "_uimm12");
> -
> -  Operand regextWm = !cast<Operand>(prefix # "_Wm_regext");
> -  Operand regextXm = !cast<Operand>(prefix # "_Xm_regext");
> -}
> -
> -def byte_addrparams : AddrParams<"byte">;
> -def hword_addrparams : AddrParams<"hword">;
> -def word_addrparams : AddrParams<"word">;
> -def dword_addrparams : AddrParams<"dword">;
> -def qword_addrparams : AddrParams<"qword">;
> -
> -multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
> -                                bit high_opc, string asmsuffix,
> -                                RegisterClass GPR, AddrParams params> {
> -  // Unsigned immediate
> -  def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0},
> -                     (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12),
> -                     "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
> -                     [], NoItinerary>,
> -             Sched<[WriteSt, ReadSt, ReadSt]> {
> -    let mayStore = 1;
> -  }
> -  def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]",
> -                (!cast<Instruction>(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -  def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1},
> -                      (outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
> -                      "ldr" #  asmsuffix # "\t$Rt, [$Rn, $UImm12]",
> -                      [], NoItinerary>,
> -             Sched<[WriteLd, ReadLd]> {
> -    let mayLoad = 1;
> -  }
> -  def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]",
> -                (!cast<Instruction>(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -  // Register offset (four of these: load/store and Wm/Xm).
> -  let mayLoad = 1 in {
> -    def _Wm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b0,
> -                            (outs GPR:$Rt),
> -                            (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
> -                            "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
> -                            [], NoItinerary>,
> -                            Sched<[WriteLd, ReadLd, ReadLd]>;
> -
> -    def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1,
> -                            (outs GPR:$Rt),
> -                            (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
> -                            "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
> -                            [], NoItinerary>,
> -                            Sched<[WriteLd, ReadLd, ReadLd]>;
> -  }
> -  def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]",
> -        (!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn,
> -                                                          GPR64:$Rm, 2)>;
> -
> -  let mayStore = 1 in {
> -    def _Wm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b0,
> -                                  (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm,
> -                                               params.regextWm:$Ext),
> -                                  "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
> -                                  [], NoItinerary>,
> -                            Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
> -
> -    def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1,
> -                                  (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm,
> -                                               params.regextXm:$Ext),
> -                                  "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
> -                                  [], NoItinerary>,
> -                            Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
> -  }
> -  def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]",
> -      (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn,
> -                                                        GPR64:$Rm, 2)>;
> -
> -  // Unaligned immediate
> -  def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0},
> -                             (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
> -                             "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
> -                             [], NoItinerary>,
> -              Sched<[WriteSt, ReadSt, ReadSt]> {
> -    let mayStore = 1;
> -  }
> -  def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]",
> -               (!cast<Instruction>(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -  def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1},
> -                             (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                             "ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
> -                             [], NoItinerary>,
> -              Sched<[WriteLd, ReadLd]> {
> -    let mayLoad = 1;
> -  }
> -  def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]",
> -               (!cast<Instruction>(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -  // Post-indexed
> -  def _PostInd_STR : A64I_LSpostind<size, v, {high_opc, 0b0},
> -                               (outs GPR64xsp:$Rn_wb),
> -                               (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
> -                               "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
> -                               [], NoItinerary>,
> -                     Sched<[WriteSt, ReadSt, ReadSt]> {
> -    let Constraints = "$Rn = $Rn_wb";
> -    let mayStore = 1;
> -
> -    // Decoder only needed for unpredictability checking (FIXME).
> -    let DecoderMethod = "DecodeSingleIndexedInstruction";
> -  }
> -
> -  def _PostInd_LDR : A64I_LSpostind<size, v, {high_opc, 0b1},
> -                                    (outs GPR:$Rt, GPR64xsp:$Rn_wb),
> -                                    (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                                    "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
> -                                    [], NoItinerary>,
> -                     Sched<[WriteLd, WriteLd, ReadLd]> {
> -    let mayLoad = 1;
> -    let Constraints = "$Rn = $Rn_wb";
> -    let DecoderMethod = "DecodeSingleIndexedInstruction";
> -  }
> -
> -  // Pre-indexed
> -  def _PreInd_STR : A64I_LSpreind<size, v, {high_opc, 0b0},
> -                               (outs GPR64xsp:$Rn_wb),
> -                               (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
> -                               "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
> -                               [], NoItinerary>,
> -                    Sched<[WriteSt, ReadSt, ReadSt]> {
> -    let Constraints = "$Rn = $Rn_wb";
> -    let mayStore = 1;
> -
> -    // Decoder only needed for unpredictability checking (FIXME).
> -    let DecoderMethod = "DecodeSingleIndexedInstruction";
> -  }
> -
> -  def _PreInd_LDR : A64I_LSpreind<size, v, {high_opc, 0b1},
> -                                    (outs GPR:$Rt, GPR64xsp:$Rn_wb),
> -                                    (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                                    "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
> -                                    [], NoItinerary>,
> -                    Sched<[WriteLd, WriteLd, ReadLd]> {
> -    let mayLoad = 1;
> -    let Constraints = "$Rn = $Rn_wb";
> -    let DecoderMethod = "DecodeSingleIndexedInstruction";
> -  }
> -
> -}
> -
> -// STRB/LDRB: First define the instructions
> -defm LS8
> -  : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>;
> -
> -// STRH/LDRH
> -defm LS16
> -  : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>;
> -
> -
> -// STR/LDR to/from a W register
> -defm LS32
> -  : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>;
> -
> -// STR/LDR to/from an X register
> -defm LS64
> -  : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>;
> -
> -let Predicates = [HasFPARMv8] in {
> -// STR/LDR to/from a B register
> -defm LSFP8
> -  : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>;
> -
> -// STR/LDR to/from an H register
> -defm LSFP16
> -  : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>;
> -
> -// STR/LDR to/from an S register
> -defm LSFP32
> -  : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>;
> -// STR/LDR to/from a D register
> -defm LSFP64
> -  : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>;
> -// STR/LDR to/from a Q register
> -defm LSFP128
> -  : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128,
> -                         qword_addrparams>;
> -}
> -
> -//===------------------------------
> -// 2.3 Signed loads
> -//===------------------------------
> -
> -// Byte and half-word signed loads can both go into either an X or a W register,
> -// so it's worth factoring out. Signed word loads don't fit because there is no
> -// W version.
> -multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
> -                           string prefix> {
> -  // Unsigned offset
> -  def w : A64I_LSunsigimm<size, 0b0, 0b11,
> -                          (outs GPR32:$Rt),
> -                          (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
> -                          "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
> -                          [], NoItinerary>,
> -          Sched<[WriteLd, ReadLd]> {
> -    let mayLoad = 1;
> -  }
> -  def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
> -                  (!cast<Instruction>(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -  def x : A64I_LSunsigimm<size, 0b0, 0b10,
> -                          (outs GPR64:$Rt),
> -                          (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
> -                          "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
> -                          [], NoItinerary>,
> -          Sched<[WriteLd, ReadLd]> {
> -    let mayLoad = 1;
> -  }
> -  def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
> -                  (!cast<Instruction>(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -  // Register offset
> -  let mayLoad = 1 in {
> -    def w_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b0,
> -                            (outs GPR32:$Rt),
> -                            (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
> -                            "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
> -                            [], NoItinerary>,
> -                         Sched<[WriteLd, ReadLd, ReadLd]>;
> -
> -    def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1,
> -                            (outs GPR32:$Rt),
> -                            (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
> -                            "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
> -                            [], NoItinerary>,
> -                         Sched<[WriteLd, ReadLd, ReadLd]>;
> -
> -    def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0,
> -                            (outs GPR64:$Rt),
> -                            (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
> -                            "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
> -                            [], NoItinerary>,
> -                         Sched<[WriteLd, ReadLd, ReadLd]>;
> -
> -    def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1,
> -                            (outs GPR64:$Rt),
> -                            (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
> -                            "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
> -                            [], NoItinerary>,
> -                         Sched<[WriteLd, ReadLd, ReadLd]>;
> -  }
> -  def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
> -        (!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn,
> -                                                       GPR64:$Rm, 2)>;
> -
> -  def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
> -        (!cast<Instruction>(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn,
> -                                                       GPR64:$Rm, 2)>;
> -
> -
> -  let mayLoad = 1 in {
> -    // Unaligned offset
> -    def w_U : A64I_LSunalimm<size, 0b0, 0b11,
> -                             (outs GPR32:$Rt),
> -                             (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                             "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
> -                             [], NoItinerary>,
> -              Sched<[WriteLd, ReadLd]>;
> -
> -    def x_U : A64I_LSunalimm<size, 0b0, 0b10,
> -                             (outs GPR64:$Rt),
> -                             (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                             "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
> -                             [], NoItinerary>,
> -              Sched<[WriteLd, ReadLd]>;
> -
> -
> -    // Post-indexed
> -    def w_PostInd : A64I_LSpostind<size, 0b0, 0b11,
> -                                 (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
> -                                 (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                                 "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
> -                                 [], NoItinerary>,
> -                    Sched<[WriteLd, WriteLd, ReadLd]> {
> -      let Constraints = "$Rn = $Rn_wb";
> -      let DecoderMethod = "DecodeSingleIndexedInstruction";
> -    }
> -
> -    def x_PostInd : A64I_LSpostind<size, 0b0, 0b10,
> -                                   (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
> -                                   (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                                   "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
> -                                   [], NoItinerary>,
> -                    Sched<[WriteLd, WriteLd, ReadLd]> {
> -      let Constraints = "$Rn = $Rn_wb";
> -      let DecoderMethod = "DecodeSingleIndexedInstruction";
> -    }
> -
> -    // Pre-indexed
> -    def w_PreInd : A64I_LSpreind<size, 0b0, 0b11,
> -                                 (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
> -                                 (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                                 "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
> -                                 [], NoItinerary>,
> -                   Sched<[WriteLd, WriteLd, ReadLd]> {
> -      let Constraints = "$Rn = $Rn_wb";
> -      let DecoderMethod = "DecodeSingleIndexedInstruction";
> -    }
> -
> -    def x_PreInd : A64I_LSpreind<size, 0b0, 0b10,
> -                                 (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
> -                                 (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                                 "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
> -                                 [], NoItinerary>,
> -                   Sched<[WriteLd, WriteLd, ReadLd]> {
> -      let Constraints = "$Rn = $Rn_wb";
> -      let DecoderMethod = "DecodeSingleIndexedInstruction";
> -    }
> -  } // let mayLoad = 1
> -}
> -
> -// LDRSB
> -defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">;
> -// LDRSH
> -defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">;
> -
> -// LDRSW: load a 32-bit register, sign-extending to 64-bits.
> -def LDRSWx
> -    : A64I_LSunsigimm<0b10, 0b0, 0b10,
> -                    (outs GPR64:$Rt),
> -                    (ins GPR64xsp:$Rn, word_uimm12:$UImm12),
> -                    "ldrsw\t$Rt, [$Rn, $UImm12]",
> -                    [], NoItinerary>,
> -      Sched<[WriteLd, ReadLd]> {
> -  let mayLoad = 1;
> -}
> -def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -let mayLoad = 1 in {
> -  def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0,
> -                             (outs GPR64:$Rt),
> -                             (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext),
> -                             "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
> -                             [], NoItinerary>,
> -                            Sched<[WriteLd, ReadLd, ReadLd]>;
> -
> -  def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1,
> -                             (outs GPR64:$Rt),
> -                             (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext),
> -                             "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
> -                             [], NoItinerary>,
> -                            Sched<[WriteLd, ReadLd, ReadLd]>;
> -}
> -def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]",
> -                (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>;
> -
> -
> -def LDURSWx
> -    : A64I_LSunalimm<0b10, 0b0, 0b10,
> -                    (outs GPR64:$Rt),
> -                    (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                    "ldursw\t$Rt, [$Rn, $SImm9]",
> -                    [], NoItinerary>,
> -      Sched<[WriteLd, ReadLd]> {
> -  let mayLoad = 1;
> -}
> -def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -def LDRSWx_PostInd
> -    : A64I_LSpostind<0b10, 0b0, 0b10,
> -                    (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
> -                    (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                    "ldrsw\t$Rt, [$Rn], $SImm9",
> -                    [], NoItinerary>,
> -      Sched<[WriteLd, WriteLd, ReadLd]> {
> -  let mayLoad = 1;
> -  let Constraints = "$Rn = $Rn_wb";
> -  let DecoderMethod = "DecodeSingleIndexedInstruction";
> -}
> -
> -def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10,
> -                                 (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
> -                                 (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                                 "ldrsw\t$Rt, [$Rn, $SImm9]!",
> -                                 [], NoItinerary>,
> -                    Sched<[WriteLd, WriteLd, ReadLd]> {
> -  let mayLoad = 1;
> -  let Constraints = "$Rn = $Rn_wb";
> -  let DecoderMethod = "DecodeSingleIndexedInstruction";
> -}
> -
> -//===------------------------------
> -// 2.4 Prefetch operations
> -//===------------------------------
> -
> -def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs),
> -                 (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12),
> -                 "prfm\t$Rt, [$Rn, $UImm12]",
> -                 [], NoItinerary>,
> -           Sched<[WritePreLd, ReadPreLd]> {
> -  let mayLoad = 1;
> -}
> -def : InstAlias<"prfm $Rt, [$Rn]",
> -                (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -let mayLoad = 1 in {
> -  def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs),
> -                                        (ins prefetch_op:$Rt, GPR64xsp:$Rn,
> -                                             GPR32:$Rm, dword_Wm_regext:$Ext),
> -                                        "prfm\t$Rt, [$Rn, $Rm, $Ext]",
> -                                        [], NoItinerary>,
> -                          Sched<[WritePreLd, ReadPreLd]>;
> -  def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs),
> -                                        (ins prefetch_op:$Rt, GPR64xsp:$Rn,
> -                                             GPR64:$Rm, dword_Xm_regext:$Ext),
> -                                        "prfm\t$Rt, [$Rn, $Rm, $Ext]",
> -                                        [], NoItinerary>,
> -                          Sched<[WritePreLd, ReadPreLd]>;
> -}
> -
> -def : InstAlias<"prfm $Rt, [$Rn, $Rm]",
> -                (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn,
> -                                   GPR64:$Rm, 2)>;
> -
> -
> -def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs),
> -                         (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
> -                         "prfum\t$Rt, [$Rn, $SImm9]",
> -                         [], NoItinerary>,
> -            Sched<[WritePreLd, ReadPreLd]> {
> -  let mayLoad = 1;
> -}
> -def : InstAlias<"prfum $Rt, [$Rn]",
> -                (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Load-store register (unprivileged) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH
> -
> -// These instructions very much mirror the "unscaled immediate" loads, but since
> -// there are no floating-point variants we need to split them out into their own
> -// section to avoid instantiation of "ldtr d0, [sp]" etc.
> -
> -multiclass A64I_LDTRSTTR<bits<2> size, string asmsuffix, RegisterClass GPR,
> -                         string prefix> {
> -  def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00,
> -                              (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
> -                              "sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
> -                              [], NoItinerary>,
> -                    Sched<[WriteLd, ReadLd]> {
> -    let mayStore = 1;
> -  }
> -
> -  def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]",
> -         (!cast<Instruction>(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -  def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01,
> -                               (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                               "ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
> -                               [], NoItinerary>,
> -                    Sched<[WriteLd, ReadLd]> {
> -    let mayLoad = 1;
> -  }
> -
> -  def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]",
> -         (!cast<Instruction>(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -}
> -
> -// STTRB/LDTRB: First define the instructions
> -defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">;
> -
> -// STTRH/LDTRH
> -defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">;
> -
> -// STTR/LDTR to/from a W register
> -defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">;
> -
> -// STTR/LDTR to/from an X register
> -defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">;
> -
> -// Now a class for the signed instructions that can go to either 32 or 64
> -// bits...
> -multiclass A64I_LDTR_signed<bits<2> size, string asmopcode, string prefix> {
> -  let mayLoad = 1 in {
> -    def w : A64I_LSunpriv<size, 0b0, 0b11,
> -                          (outs GPR32:$Rt),
> -                          (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                          "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
> -                          [], NoItinerary>,
> -            Sched<[WriteLd, ReadLd]>;
> -
> -    def x : A64I_LSunpriv<size, 0b0, 0b10,
> -                          (outs GPR64:$Rt),
> -                          (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                          "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
> -                          [], NoItinerary>,
> -            Sched<[WriteLd, ReadLd]>;
> -  }
> -
> -  def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
> -                 (!cast<Instruction>(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -  def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
> -                 (!cast<Instruction>(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -}
> -
> -// LDTRSB
> -defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">;
> -// LDTRSH
> -defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">;
> -
> -// And finally LDTRSW which only goes to 64 bits.
> -def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10,
> -                            (outs GPR64:$Rt),
> -                            (ins GPR64xsp:$Rn, simm9:$SImm9),
> -                            "ldtrsw\t$Rt, [$Rn, $SImm9]",
> -                            [], NoItinerary>,
> -              Sched<[WriteLd, ReadLd]> {
> -  let mayLoad = 1;
> -}
> -def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Load-store register pair (offset) instructions
> -//===----------------------------------------------------------------------===//
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store register pair (post-indexed) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: STP, LDP, LDPSW
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store register pair (pre-indexed) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: STP, LDP, LDPSW
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store non-temporal register pair (offset) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: STNP, LDNP
> -
> -
> -// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
> -// know the access size via some means. An isolated operand does not have this
> -// information unless told from here, which means we need separate tablegen
> -// Operands for each access size. This multiclass takes care of instantiating
> -// the correct template functions in the rest of the backend.
> -
> -multiclass offsets_simm7<string MemSize, string prefix> {
> -  // The bare signed 7-bit immediate is used in post-indexed instructions, but
> -  // because of the scaling performed a generic "simm7" operand isn't
> -  // appropriate here either.
> -  def simm7_asmoperand : AsmOperandClass {
> -    let Name = "SImm7_Scaled" # MemSize;
> -    let PredicateMethod = "isSImm7Scaled<" # MemSize # ">";
> -    let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">";
> -    let DiagnosticType = "LoadStoreSImm7_" # MemSize;
> -  }
> -
> -  def simm7 : Operand<i64> {
> -    let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">";
> -    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "simm7_asmoperand");
> -  }
> -}
> -
> -defm word_  : offsets_simm7<"4", "word_">;
> -defm dword_ : offsets_simm7<"8", "dword_">;
> -defm qword_ : offsets_simm7<"16", "qword_">;
> -
> -multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
> -                          Operand simm7, string prefix> {
> -  def _STR : A64I_LSPoffset<opc, v, 0b0, (outs),
> -                    (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
> -                    "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
> -             Sched<[WriteLd, ReadLd]> {
> -    let mayStore = 1;
> -    let DecoderMethod = "DecodeLDSTPairInstruction";
> -  }
> -  def : InstAlias<"stp $Rt, $Rt2, [$Rn]",
> -                  (!cast<Instruction>(prefix # "_STR") SomeReg:$Rt,
> -                                                SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
> -
> -  def _LDR : A64I_LSPoffset<opc, v, 0b1,
> -                            (outs SomeReg:$Rt, SomeReg:$Rt2),
> -                            (ins GPR64xsp:$Rn, simm7:$SImm7),
> -                            "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
> -             Sched<[WriteLd, WriteLd, ReadLd]> {
> -    let mayLoad = 1;
> -    let DecoderMethod = "DecodeLDSTPairInstruction";
> -  }
> -  def : InstAlias<"ldp $Rt, $Rt2, [$Rn]",
> -                  (!cast<Instruction>(prefix # "_LDR") SomeReg:$Rt,
> -                                                SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
> -
> -  def _PostInd_STR : A64I_LSPpostind<opc, v, 0b0,
> -                               (outs GPR64xsp:$Rn_wb),
> -                               (ins SomeReg:$Rt, SomeReg:$Rt2,
> -                                    GPR64xsp:$Rn,
> -                                    simm7:$SImm7),
> -                               "stp\t$Rt, $Rt2, [$Rn], $SImm7",
> -                               [], NoItinerary>,
> -                     Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
> -    let mayStore = 1;
> -    let Constraints = "$Rn = $Rn_wb";
> -
> -    // Decoder only needed for unpredictability checking (FIXME).
> -    let DecoderMethod = "DecodeLDSTPairInstruction";
> -  }
> -
> -  def _PostInd_LDR : A64I_LSPpostind<opc, v, 0b1,
> -                        (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
> -                        (ins GPR64xsp:$Rn, simm7:$SImm7),
> -                        "ldp\t$Rt, $Rt2, [$Rn], $SImm7",
> -                        [], NoItinerary>,
> -                     Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
> -    let mayLoad = 1;
> -    let Constraints = "$Rn = $Rn_wb";
> -    let DecoderMethod = "DecodeLDSTPairInstruction";
> -  }
> -
> -  def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb),
> -                       (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
> -                       "stp\t$Rt, $Rt2, [$Rn, $SImm7]!",
> -                       [], NoItinerary>,
> -                    Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
> -    let mayStore = 1;
> -    let Constraints = "$Rn = $Rn_wb";
> -    let DecoderMethod = "DecodeLDSTPairInstruction";
> -  }
> -
> -  def _PreInd_LDR : A64I_LSPpreind<opc, v, 0b1,
> -                              (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
> -                              (ins GPR64xsp:$Rn, simm7:$SImm7),
> -                              "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!",
> -                              [], NoItinerary>,
> -                    Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
> -    let mayLoad = 1;
> -    let Constraints = "$Rn = $Rn_wb";
> -    let DecoderMethod = "DecodeLDSTPairInstruction";
> -  }
> -
> -  def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs),
> -                       (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
> -                       "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
> -                     Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
> -    let mayStore = 1;
> -    let DecoderMethod = "DecodeLDSTPairInstruction";
> -  }
> -  def : InstAlias<"stnp $Rt, $Rt2, [$Rn]",
> -                  (!cast<Instruction>(prefix # "_NonTemp_STR") SomeReg:$Rt,
> -                                                SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
> -
> -  def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1,
> -                            (outs SomeReg:$Rt, SomeReg:$Rt2),
> -                            (ins GPR64xsp:$Rn, simm7:$SImm7),
> -                            "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
> -                     Sched<[WriteLd, WriteLd, ReadLd]> {
> -    let mayLoad = 1;
> -    let DecoderMethod = "DecodeLDSTPairInstruction";
> -  }
> -  def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]",
> -                  (!cast<Instruction>(prefix # "_NonTemp_LDR") SomeReg:$Rt,
> -                                                SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
> -
> -}
> -
> -
> -defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">;
> -defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">;
> -
> -let Predicates = [HasFPARMv8] in {
> -defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">;
> -defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64,  dword_simm7, "LSFPPair64">;
> -defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7,
> -                                  "LSFPPair128">;
> -}
> -
> -
> -def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
> -                           (outs GPR64:$Rt, GPR64:$Rt2),
> -                           (ins GPR64xsp:$Rn, word_simm7:$SImm7),
> -                           "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
> -             Sched<[WriteLd, WriteLd, ReadLd]> {
> -  let mayLoad = 1;
> -  let DecoderMethod = "DecodeLDSTPairInstruction";
> -}
> -def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]",
> -                (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>;
> -
> -def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1,
> -                                  (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
> -                                  (ins GPR64xsp:$Rn, word_simm7:$SImm7),
> -                                  "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7",
> -                                  [], NoItinerary>,
> -                     Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
> -  let mayLoad = 1;
> -  let Constraints = "$Rn = $Rn_wb";
> -  let DecoderMethod = "DecodeLDSTPairInstruction";
> -}
> -
> -def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1,
> -                                   (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
> -                                   (ins GPR64xsp:$Rn, word_simm7:$SImm7),
> -                                   "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!",
> -                                   [], NoItinerary>,
> -                    Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
> -  let mayLoad = 1;
> -  let Constraints = "$Rn = $Rn_wb";
> -  let DecoderMethod = "DecodeLDSTPairInstruction";
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Logical (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV
> -
> -multiclass logical_imm_operands<string prefix, string note,
> -                                int size, ValueType VT> {
> -  def _asmoperand : AsmOperandClass {
> -    let Name = "LogicalImm" # note # size;
> -    let PredicateMethod = "isLogicalImm" # note # "<" # size # ">";
> -    let RenderMethod = "addLogicalImmOperands<" # size # ">";
> -    let DiagnosticType = "LogicalSecondSource";
> -  }
> -
> -  def _operand
> -        : Operand<VT>, ComplexPattern<VT, 1, "SelectLogicalImm", [imm]> {
> -    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
> -    let PrintMethod = "printLogicalImmOperand<" # size # ">";
> -    let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">";
> -  }
> -}
> -
> -defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>;
> -defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>;
> -
> -// The mov versions only differ in assembly parsing, where they
> -// exclude values representable with either MOVZ or MOVN.
> -defm logical_imm32_mov
> -  : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>;
> -defm logical_imm64_mov
> -  : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>;
> -
> -
> -multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode> {
> -  def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd),
> -                         (ins GPR32:$Rn, logical_imm32_operand:$Imm),
> -                         !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
> -                         [(set i32:$Rd,
> -                               (opnode i32:$Rn, logical_imm32_operand:$Imm))],
> -                         NoItinerary>,
> -            Sched<[WriteALU, ReadALU]>;
> -
> -  def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd),
> -                         (ins GPR64:$Rn, logical_imm64_operand:$Imm),
> -                         !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
> -                         [(set i64:$Rd,
> -                               (opnode i64:$Rn, logical_imm64_operand:$Imm))],
> -                         NoItinerary>,
> -            Sched<[WriteALU, ReadALU]>;
> -}
> -
> -defm AND : A64I_logimmSizes<0b00, "and", and>;
> -defm ORR : A64I_logimmSizes<0b01, "orr", or>;
> -defm EOR : A64I_logimmSizes<0b10, "eor", xor>;
> -
> -let Defs = [NZCV] in {
> -  def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd),
> -                                (ins GPR32:$Rn, logical_imm32_operand:$Imm),
> -                                "ands\t$Rd, $Rn, $Imm",
> -                                [], NoItinerary>,
> -                Sched<[WriteALU, ReadALU]>;
> -
> -  def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd),
> -                                (ins GPR64:$Rn, logical_imm64_operand:$Imm),
> -                                "ands\t$Rd, $Rn, $Imm",
> -                                [], NoItinerary>,
> -                Sched<[WriteALU, ReadALU]>;
> -}
> -
> -def : InstAlias<"tst $Rn, $Imm",
> -                (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>;
> -def : InstAlias<"tst $Rn, $Imm",
> -                (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>;
> -// FIXME: these sometimes are canonical.
> -def : InstAlias<"mov $Rd, $Imm",
> -                (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm), 0>;
> -def : InstAlias<"mov $Rd, $Imm",
> -                (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm), 0>;
> -
> -//===----------------------------------------------------------------------===//
> -// Logical (shifted register) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV
> -
> -// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS"
> -// behaves differently for unsigned comparisons, so we defensively only allow
> -// signed or n/a as the operand. In practice "unsigned greater than 0" is "not
> -// equal to 0" and LLVM gives us this.
> -def signed_cond : PatLeaf<(cond), [{
> -  return !isUnsignedIntSetCC(N->get());
> -}]>;
> -
> -
> -// These instructions share their "shift" operands with add/sub (shifted
> -// register instructions). They are defined there.
> -
> -// N.b. the commutable parameter is just !N. It will be first against the wall
> -// when the revolution comes.
> -multiclass logical_shifts<string prefix, bit sf, bits<2> opc,
> -                          bit N, bit commutable,
> -                          string asmop, SDPatternOperator opfrag, ValueType ty,
> -                          RegisterClass GPR, list<Register> defs> {
> -  let isCommutable = commutable, Defs = defs in {
> -  def _lsl : A64I_logicalshift<sf, opc, 0b00, N,
> -                       (outs GPR:$Rd),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
> -                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> -                       [(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
> -                            !cast<Operand>("lsl_operand_" # ty):$Imm6))
> -                       )],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -  def _lsr : A64I_logicalshift<sf, opc, 0b01, N,
> -                       (outs GPR:$Rd),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
> -                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> -                       [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
> -                            !cast<Operand>("lsr_operand_" # ty):$Imm6))
> -                       )],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -  def _asr : A64I_logicalshift<sf, opc, 0b10, N,
> -                       (outs GPR:$Rd),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("asr_operand_" # ty):$Imm6),
> -                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> -                       [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
> -                            !cast<Operand>("asr_operand_" # ty):$Imm6))
> -                       )],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -  def _ror : A64I_logicalshift<sf, opc, 0b11, N,
> -                       (outs GPR:$Rd),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("ror_operand_" # ty):$Imm6),
> -                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> -                       [(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm,
> -                            !cast<Operand>("ror_operand_" # ty):$Imm6))
> -                       )],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU, ReadALU]>;
> -  }
> -
> -  def _noshift
> -      : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
> -                 (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
> -                                                      GPR:$Rm, 0)>;
> -
> -  def : Pat<(opfrag ty:$Rn, ty:$Rm),
> -            (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
> -}
> -
> -multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable,
> -                         string asmop, SDPatternOperator opfrag,
> -                         list<Register> defs> {
> -  defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N,
> -                            commutable, asmop, opfrag, i64, GPR64, defs>;
> -  defm www : logical_shifts<prefix # "www", 0b0, opc, N,
> -                            commutable, asmop, opfrag, i32, GPR32, defs>;
> -}
> -
> -
> -defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>;
> -defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>;
> -defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>;
> -defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands",
> -             PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs),
> -                     [{ (void)N; return false; }]>,
> -             [NZCV]>;
> -
> -defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic",
> -                         PatFrag<(ops node:$lhs, node:$rhs),
> -                                 (and node:$lhs, (not node:$rhs))>, []>;
> -defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn",
> -                         PatFrag<(ops node:$lhs, node:$rhs),
> -                                 (or node:$lhs, (not node:$rhs))>, []>;
> -defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon",
> -                         PatFrag<(ops node:$lhs, node:$rhs),
> -                                 (xor node:$lhs, (not node:$rhs))>, []>;
> -defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics",
> -                          PatFrag<(ops node:$lhs, node:$rhs),
> -                                  (and node:$lhs, (not node:$rhs)),
> -                                  [{ (void)N; return false; }]>,
> -                          [NZCV]>;
> -
> -multiclass tst_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
> -  let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in {
> -  def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0,
> -                       (outs),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
> -                       "tst\t$Rn, $Rm, $Imm6",
> -                       [(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm,
> -                           !cast<Operand>("lsl_operand_" # ty):$Imm6)),
> -                                          0, signed_cond))],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -
> -  def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0,
> -                       (outs),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
> -                       "tst\t$Rn, $Rm, $Imm6",
> -                       [(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm,
> -                           !cast<Operand>("lsr_operand_" # ty):$Imm6)),
> -                                          0, signed_cond))],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -  def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0,
> -                       (outs),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("asr_operand_" # ty):$Imm6),
> -                       "tst\t$Rn, $Rm, $Imm6",
> -                       [(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm,
> -                           !cast<Operand>("asr_operand_" # ty):$Imm6)),
> -                                          0, signed_cond))],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -  def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0,
> -                       (outs),
> -                       (ins GPR:$Rn, GPR:$Rm,
> -                            !cast<Operand>("ror_operand_" # ty):$Imm6),
> -                       "tst\t$Rn, $Rm, $Imm6",
> -                       [(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm,
> -                           !cast<Operand>("ror_operand_" # ty):$Imm6)),
> -                                          0, signed_cond))],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU, ReadALU]>;
> -  }
> -
> -  def _noshift : InstAlias<"tst $Rn, $Rm",
> -                     (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
> -
> -  def : Pat<(A64setcc (and ty:$Rn, ty:$Rm), 0, signed_cond),
> -            (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
> -}
> -
> -defm TSTxx : tst_shifts<"TSTxx", 0b1, i64, GPR64>;
> -defm TSTww : tst_shifts<"TSTww", 0b0, i32, GPR32>;
> -
> -
> -multiclass mvn_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
> -  let isCommutable = 0, Rn = 0b11111 in {
> -  def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1,
> -                       (outs GPR:$Rd),
> -                       (ins GPR:$Rm,
> -                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
> -                       "mvn\t$Rd, $Rm, $Imm6",
> -                       [(set ty:$Rd, (not (shl ty:$Rm,
> -                         !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -
> -  def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1,
> -                       (outs GPR:$Rd),
> -                       (ins GPR:$Rm,
> -                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
> -                       "mvn\t$Rd, $Rm, $Imm6",
> -                       [(set ty:$Rd, (not (srl ty:$Rm,
> -                         !cast<Operand>("lsr_operand_" # ty):$Imm6)))],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -  def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1,
> -                       (outs GPR:$Rd),
> -                       (ins GPR:$Rm,
> -                            !cast<Operand>("asr_operand_" # ty):$Imm6),
> -                       "mvn\t$Rd, $Rm, $Imm6",
> -                       [(set ty:$Rd, (not (sra ty:$Rm,
> -                         !cast<Operand>("asr_operand_" # ty):$Imm6)))],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -  def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1,
> -                       (outs GPR:$Rd),
> -                       (ins GPR:$Rm,
> -                            !cast<Operand>("ror_operand_" # ty):$Imm6),
> -                       "mvn\t$Rd, $Rm, $Imm6",
> -                       [(set ty:$Rd, (not (rotr ty:$Rm,
> -                         !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
> -                       NoItinerary>,
> -             Sched<[WriteALU, ReadALU, ReadALU]>;
> -  }
> -
> -  def _noshift : InstAlias<"mvn $Rn, $Rm",
> -                     (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
> -
> -  def : Pat<(not ty:$Rm),
> -            (!cast<Instruction>(prefix # "_lsl") $Rm, 0)>;
> -}
> -
> -defm MVNxx : mvn_shifts<"MVNxx", 0b1, i64, GPR64>;
> -defm MVNww : mvn_shifts<"MVNww", 0b0, i32, GPR32>;
> -
> -def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
> -def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Move wide (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: MOVN, MOVZ, MOVK + MOV aliases
> -
> -// A wide variety of different relocations are needed for variants of these
> -// instructions, so it turns out that we need a different operand for all of
> -// them.
> -multiclass movw_operands<string prefix, string instname, int width> {
> -  def _imm_asmoperand : AsmOperandClass {
> -    let Name = instname # width # "Shifted" # shift;
> -    let PredicateMethod = "is" # instname # width # "Imm";
> -    let RenderMethod = "addMoveWideImmOperands";
> -    let ParserMethod = "ParseImmWithLSLOperand";
> -    let DiagnosticType = "MOVWUImm16";
> -  }
> -
> -  def _imm : Operand<i64> {
> -    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand");
> -    let PrintMethod = "printMoveWideImmOperand";
> -    let EncoderMethod = "getMoveWideImmOpValue";
> -    let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">";
> -
> -    let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
> -  }
> -}
> -
> -defm movn32 : movw_operands<"movn32", "MOVN", 32>;
> -defm movn64 : movw_operands<"movn64", "MOVN", 64>;
> -defm movz32 : movw_operands<"movz32", "MOVZ", 32>;
> -defm movz64 : movw_operands<"movz64", "MOVZ", 64>;
> -defm movk32 : movw_operands<"movk32", "MOVK", 32>;
> -defm movk64 : movw_operands<"movk64", "MOVK", 64>;
> -
> -multiclass A64I_movwSizes<bits<2> opc, string asmop, dag ins32bit,
> -                          dag ins64bit> {
> -
> -  def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit,
> -                      !strconcat(asmop, "\t$Rd, $FullImm"),
> -                      [], NoItinerary>,
> -            Sched<[WriteALU]> {
> -    bits<18> FullImm;
> -    let UImm16 = FullImm{15-0};
> -    let Shift = FullImm{17-16};
> -  }
> -
> -  def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit,
> -                      !strconcat(asmop, "\t$Rd, $FullImm"),
> -                      [], NoItinerary>,
> -            Sched<[WriteALU]> {
> -    bits<18> FullImm;
> -    let UImm16 = FullImm{15-0};
> -    let Shift = FullImm{17-16};
> -  }
> -}
> -
> -let isMoveImm = 1, isReMaterializable = 1,
> -    isAsCheapAsAMove = 1, hasSideEffects = 0 in {
> -  defm MOVN : A64I_movwSizes<0b00, "movn",
> -                             (ins movn32_imm:$FullImm),
> -                             (ins movn64_imm:$FullImm)>;
> -
> -  // Some relocations are able to convert between a MOVZ and a MOVN. If these
> -  // are applied the instruction must be emitted with the corresponding bits as
> -  // 0, which means a MOVZ needs to override that bit from the default.
> -  let PostEncoderMethod = "fixMOVZ" in
> -  defm MOVZ : A64I_movwSizes<0b10, "movz",
> -                             (ins movz32_imm:$FullImm),
> -                             (ins movz64_imm:$FullImm)>;
> -}
> -
> -let Constraints = "$src = $Rd",
> -    SchedRW = [WriteALU, ReadALU] in
> -defm MOVK : A64I_movwSizes<0b11, "movk",
> -                           (ins GPR32:$src, movk32_imm:$FullImm),
> -                           (ins GPR64:$src, movk64_imm:$FullImm)>;
> -
> -
> -// And now the "MOV" aliases. These also need their own operands because what
> -// they accept is completely different to what the base instructions accept.
> -multiclass movalias_operand<string prefix, string basename,
> -                            string immpredicate, int width> {
> -  def _asmoperand : AsmOperandClass {
> -    let Name = basename # width # "MovAlias";
> -    let PredicateMethod
> -          = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">";
> -    let RenderMethod
> -      = "addMoveWideMovAliasOperands<" # width # ", "
> -                                       # "A64Imms::" # immpredicate # ">";
> -  }
> -
> -  def _movimm : Operand<i64> {
> -    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
> -
> -    let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
> -  }
> -}
> -
> -defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>;
> -defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>;
> -defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>;
> -defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>;
> -
> -// FIXME: these are officially canonical aliases, but TableGen is too limited to
> -// print them at the moment. I believe in this case an "AliasPredicate" method
> -// will need to be implemented. to allow it, as well as the more generally
> -// useful handling of non-register, non-constant operands.
> -class movalias<Instruction INST, RegisterClass GPR, Operand operand>
> -  : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm), 0>;
> -
> -def : movalias<MOVZwii, GPR32, movz32_movimm>;
> -def : movalias<MOVZxii, GPR64, movz64_movimm>;
> -def : movalias<MOVNwii, GPR32, movn32_movimm>;
> -def : movalias<MOVNxii, GPR64, movn64_movimm>;
> -
> -def movw_addressref_g0 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<0>">;
> -def movw_addressref_g1 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<1>">;
> -def movw_addressref_g2 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<2>">;
> -def movw_addressref_g3 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<3>">;
> -
> -def : Pat<(A64WrapperLarge movw_addressref_g3:$G3, movw_addressref_g2:$G2,
> -                           movw_addressref_g1:$G1, movw_addressref_g0:$G0),
> -          (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref_g3:$G3),
> -                                     movw_addressref_g2:$G2),
> -                            movw_addressref_g1:$G1),
> -                   movw_addressref_g0:$G0)>;
> -
> -//===----------------------------------------------------------------------===//
> -// PC-relative addressing instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: ADR, ADRP
> -
> -def adr_label : Operand<i64> {
> -  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_adr_prel>";
> -
> -  // This label is a 21-bit offset from PC, unscaled
> -  let PrintMethod = "printLabelOperand<21, 1>";
> -  let ParserMatchClass = label_asmoperand<21, 1>;
> -  let OperandType = "OPERAND_PCREL";
> -}
> -
> -def adrp_label_asmoperand : AsmOperandClass {
> -  let Name = "AdrpLabel";
> -  let RenderMethod = "addLabelOperands<21, 4096>";
> -  let DiagnosticType = "Label";
> -}
> -
> -def adrp_label : Operand<i64> {
> -  let EncoderMethod = "getAdrpLabelOpValue";
> -
> -  // This label is a 21-bit offset from PC, scaled by the page-size: 4096.
> -  let PrintMethod = "printLabelOperand<21, 4096>";
> -  let ParserMatchClass = adrp_label_asmoperand;
> -  let OperandType = "OPERAND_PCREL";
> -}
> -
> -let hasSideEffects = 0 in {
> -  def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label),
> -                         "adr\t$Rd, $Label", [], NoItinerary>,
> -              Sched<[WriteALUs]>;
> -
> -  def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label),
> -                          "adrp\t$Rd, $Label", [], NoItinerary>,
> -               Sched<[WriteALUs]>;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// System instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS
> -//    + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL
> -
> -// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values.
> -def uimm3_asmoperand : AsmOperandClass {
> -  let Name = "UImm3";
> -  let PredicateMethod = "isUImm<3>";
> -  let RenderMethod = "addImmOperands";
> -  let DiagnosticType = "UImm3";
> -}
> -
> -def uimm3 : Operand<i32> {
> -  let ParserMatchClass = uimm3_asmoperand;
> -}
> -
> -// The HINT alias can accept a simple unsigned 7-bit immediate.
> -def uimm7_asmoperand : AsmOperandClass {
> -  let Name = "UImm7";
> -  let PredicateMethod = "isUImm<7>";
> -  let RenderMethod = "addImmOperands";
> -  let DiagnosticType = "UImm7";
> -}
> -
> -def uimm7 : Operand<i32> {
> -  let ParserMatchClass = uimm7_asmoperand;
> -}
> -
> -// Multiclass namedimm is defined with the prefetch operands. Most of these fit
> -// into the NamedImmMapper scheme well: they either accept a named operand or
> -// any immediate under a particular value (which may be 0, implying no immediate
> -// is allowed).
> -defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">;
> -defm isb : namedimm<"isb", "A64ISB::ISBMapper">;
> -defm ic : namedimm<"ic", "A64IC::ICMapper">;
> -defm dc : namedimm<"dc", "A64DC::DCMapper">;
> -defm at : namedimm<"at", "A64AT::ATMapper">;
> -defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">;
> -
> -// However, MRS and MSR are more complicated for a few reasons:
> -//   * There are ~1000 generic names S3_<op1>_<CRn>_<CRm>_<Op2> which have an
> -//     implementation-defined effect
> -//   * Most registers are shared, but some are read-only or write-only.
> -//   * There is a variant of MSR which accepts the same register name (SPSel),
> -//     but which would have a different encoding.
> -
> -// In principle these could be resolved in with more complicated subclasses of
> -// NamedImmMapper, however that imposes an overhead on other "named
> -// immediates". Both in concrete terms with virtual tables and in unnecessary
> -// abstraction.
> -
> -// The solution adopted here is to take the MRS/MSR Mappers out of the usual
> -// hierarchy (they're not derived from NamedImmMapper) and to add logic for
> -// their special situation.
> -def mrs_asmoperand : AsmOperandClass {
> -  let Name = "MRS";
> -  let ParserMethod = "ParseSysRegOperand";
> -  let DiagnosticType = "MRS";
> -}
> -
> -def mrs_op : Operand<i32> {
> -  let ParserMatchClass = mrs_asmoperand;
> -  let PrintMethod = "printMRSOperand";
> -  let DecoderMethod = "DecodeMRSOperand";
> -}
> -
> -def msr_asmoperand : AsmOperandClass {
> -  let Name = "MSRWithReg";
> -
> -  // Note that SPSel is valid for both this and the pstate operands, but with
> -  // different immediate encodings. This is why these operands provide a string
> -  // AArch64Operand rather than an immediate. The overlap is small enough that
> -  // it could be resolved with hackery now, but who can say in future?
> -  let ParserMethod = "ParseSysRegOperand";
> -  let DiagnosticType = "MSR";
> -}
> -
> -def msr_op : Operand<i32> {
> -  let ParserMatchClass = msr_asmoperand;
> -  let PrintMethod = "printMSROperand";
> -  let DecoderMethod = "DecodeMSROperand";
> -}
> -
> -def pstate_asmoperand : AsmOperandClass {
> -  let Name = "MSRPState";
> -  // See comment above about parser.
> -  let ParserMethod = "ParseSysRegOperand";
> -  let DiagnosticType = "MSR";
> -}
> -
> -def pstate_op : Operand<i32> {
> -  let ParserMatchClass = pstate_asmoperand;
> -  let PrintMethod = "printNamedImmOperand<A64PState::PStateMapper>";
> -  let DecoderMethod = "DecodeNamedImmOperand<A64PState::PStateMapper>";
> -}
> -
> -// When <CRn> is specified, an assembler should accept something like "C4", not
> -// the usual "#4" immediate.
> -def CRx_asmoperand : AsmOperandClass {
> -  let Name = "CRx";
> -  let PredicateMethod = "isUImm<4>";
> -  let RenderMethod = "addImmOperands";
> -  let ParserMethod = "ParseCRxOperand";
> -  // Diagnostics are handled in all cases by ParseCRxOperand.
> -}
> -
> -def CRx : Operand<i32> {
> -  let ParserMatchClass = CRx_asmoperand;
> -  let PrintMethod = "printCRxOperand";
> -}
> -
> -
> -// Finally, we can start defining the instructions.
> -
> -// HINT is straightforward, with a few aliases.
> -def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7",
> -                        [], NoItinerary> {
> -  bits<7> UImm7;
> -  let CRm = UImm7{6-3};
> -  let Op2 = UImm7{2-0};
> -
> -  let Op0 = 0b00;
> -  let Op1 = 0b011;
> -  let CRn = 0b0010;
> -  let Rt = 0b11111;
> -}
> -
> -def : InstAlias<"nop", (HINTi 0)>;
> -def : InstAlias<"yield", (HINTi 1)>;
> -def : InstAlias<"wfe", (HINTi 2)>;
> -def : InstAlias<"wfi", (HINTi 3)>;
> -def : InstAlias<"sev", (HINTi 4)>;
> -def : InstAlias<"sevl", (HINTi 5)>;
> -
> -// Quite a few instructions then follow a similar pattern of fixing common
> -// fields in the bitpattern, we'll define a helper-class for them.
> -class simple_sys<bits<2> op0, bits<3> op1, bits<4> crn, bits<3> op2,
> -                 Operand operand, string asmop>
> -  : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"),
> -                [], NoItinerary> {
> -  let Op0 = op0;
> -  let Op1 = op1;
> -  let CRn = crn;
> -  let Op2 = op2;
> -  let Rt = 0b11111;
> -}
> -
> -
> -def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">;
> -def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">;
> -def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">;
> -def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">;
> -
> -def : InstAlias<"clrex", (CLREXi 0b1111)>;
> -def : InstAlias<"isb", (ISBi 0b1111)>;
> -
> -// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP
> -// configurations at least.
> -def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>;
> -
> -// Any SYS bitpattern can be represented with a complex and opaque "SYS"
> -// instruction.
> -def SYSiccix : A64I_system<0b0, (outs),
> -                           (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm,
> -                                uimm3:$Op2, GPR64:$Rt),
> -                           "sys\t$Op1, $CRn, $CRm, $Op2, $Rt",
> -                           [], NoItinerary> {
> -  let Op0 = 0b01;
> -}
> -
> -// You can skip the Xt argument whether it makes sense or not for the generic
> -// SYS instruction.
> -def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2",
> -                (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>;
> -
> -
> -// But many have aliases, which obviously don't fit into
> -class SYSalias<dag ins, string asmstring>
> -  : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> {
> -  let isAsmParserOnly = 1;
> -
> -  bits<14> SysOp;
> -  let Op0 = 0b01;
> -  let Op1 = SysOp{13-11};
> -  let CRn = SysOp{10-7};
> -  let CRm = SysOp{6-3};
> -  let Op2 = SysOp{2-0};
> -}
> -
> -def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">;
> -
> -def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> {
> -  let Rt = 0b11111;
> -}
> -
> -def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">;
> -def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">;
> -
> -def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">;
> -
> -def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> {
> -  let Rt = 0b11111;
> -}
> -
> -
> -def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt),
> -                            (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2),
> -                            "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2",
> -                            [], NoItinerary> {
> -  let Op0 = 0b01;
> -}
> -
> -// The instructions themselves are rather simple for MSR and MRS.
> -def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt),
> -                        "msr\t$SysReg, $Rt", [], NoItinerary> {
> -  bits<16> SysReg;
> -  let Op0 = SysReg{15-14};
> -  let Op1 = SysReg{13-11};
> -  let CRn = SysReg{10-7};
> -  let CRm = SysReg{6-3};
> -  let Op2 = SysReg{2-0};
> -}
> -
> -def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg),
> -                        "mrs\t$Rt, $SysReg", [], NoItinerary> {
> -  bits<16> SysReg;
> -  let Op0 = SysReg{15-14};
> -  let Op1 = SysReg{13-11};
> -  let CRn = SysReg{10-7};
> -  let CRm = SysReg{6-3};
> -  let Op2 = SysReg{2-0};
> -}
> -
> -def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm),
> -                        "msr\t$PState, $CRm", [], NoItinerary> {
> -  bits<6> PState;
> -
> -  let Op0 = 0b00;
> -  let Op1 = PState{5-3};
> -  let CRn = 0b0100;
> -  let Op2 = PState{2-0};
> -  let Rt = 0b11111;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Test & branch (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: TBZ, TBNZ
> -
> -// The bit to test is a simple unsigned 6-bit immediate in the X-register
> -// versions.
> -def uimm6 : Operand<i64> {
> -  let ParserMatchClass = uimm6_asmoperand;
> -}
> -
> -def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>;
> -
> -def tbimm_target : Operand<OtherVT> {
> -  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_tstbr>";
> -
> -  // This label is a 14-bit offset from PC, scaled by the instruction-width: 4.
> -  let PrintMethod = "printLabelOperand<14, 4>";
> -  let ParserMatchClass = label_wid14_scal4_asmoperand;
> -
> -  let OperandType = "OPERAND_PCREL";
> -}
> -
> -def A64eq : ImmLeaf<i32, [{ return Imm == A64CC::EQ; }]>;
> -def A64ne : ImmLeaf<i32, [{ return Imm == A64CC::NE; }]>;
> -
> -// These instructions correspond to patterns involving "and" with a power of
> -// two, which we need to be able to select.
> -def tstb64_pat : ComplexPattern<i64, 1, "SelectTSTBOperand<64>">;
> -def tstb32_pat : ComplexPattern<i32, 1, "SelectTSTBOperand<32>">;
> -
> -let isBranch = 1, isTerminator = 1 in {
> -  def TBZxii : A64I_TBimm<0b0, (outs),
> -                        (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
> -                        "tbz\t$Rt, $Imm, $Label",
> -                        [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
> -                                   A64eq, bb:$Label)],
> -                        NoItinerary>,
> -               Sched<[WriteBr]>;
> -
> -  def TBNZxii : A64I_TBimm<0b1, (outs),
> -                        (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
> -                        "tbnz\t$Rt, $Imm, $Label",
> -                        [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
> -                                   A64ne, bb:$Label)],
> -                        NoItinerary>,
> -                Sched<[WriteBr]>;
> -
> -
> -  // Note, these instructions overlap with the above 64-bit patterns. This is
> -  // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both
> -  // do the same thing and are both permitted assembly. They also both have
> -  // sensible DAG patterns.
> -  def TBZwii : A64I_TBimm<0b0, (outs),
> -                        (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
> -                        "tbz\t$Rt, $Imm, $Label",
> -                        [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
> -                                   A64eq, bb:$Label)],
> -                        NoItinerary>,
> -               Sched<[WriteBr]> {
> -    let Imm{5} = 0b0;
> -  }
> -
> -  def TBNZwii : A64I_TBimm<0b1, (outs),
> -                        (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
> -                        "tbnz\t$Rt, $Imm, $Label",
> -                        [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
> -                                   A64ne, bb:$Label)],
> -                        NoItinerary>,
> -                Sched<[WriteBr]> {
> -    let Imm{5} = 0b0;
> -  }
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Unconditional branch (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: B, BL
> -
> -def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>;
> -
> -def bimm_target : Operand<OtherVT> {
> -  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_uncondbr>";
> -
> -  // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
> -  let PrintMethod = "printLabelOperand<26, 4>";
> -  let ParserMatchClass = label_wid26_scal4_asmoperand;
> -
> -  let OperandType = "OPERAND_PCREL";
> -}
> -
> -def blimm_target : Operand<i64> {
> -  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_call>";
> -
> -  // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
> -  let PrintMethod = "printLabelOperand<26, 4>";
> -  let ParserMatchClass = label_wid26_scal4_asmoperand;
> -
> -  let OperandType = "OPERAND_PCREL";
> -}
> -
> -class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type>
> -  : A64I_Bimm<op, (outs), (ins lbl_type:$Label),
> -              !strconcat(asmop, "\t$Label"), patterns,
> -              NoItinerary>,
> -    Sched<[WriteBr]>;
> -
> -let isBranch = 1 in {
> -  def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> {
> -    let isTerminator = 1;
> -    let isBarrier = 1;
> -  }
> -
> -  let SchedRW = [WriteBrL] in {
> -    def BLimm : A64I_BimmImpl<0b1, "bl",
> -                              [(AArch64Call tglobaladdr:$Label)], blimm_target> {
> -      let isCall = 1;
> -      let Defs = [X30];
> -    }
> -  }
> -}
> -
> -def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Unconditional branch (register) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: BR, BLR, RET, ERET, DRP.
> -
> -// Most of the notional opcode fields in the A64I_Breg format are fixed in A64
> -// at the moment.
> -class A64I_BregImpl<bits<4> opc,
> -                    dag outs, dag ins, string asmstr, list<dag> patterns,
> -                    InstrItinClass itin = NoItinerary>
> -  : A64I_Breg<opc, 0b11111, 0b000000, 0b00000,
> -              outs, ins, asmstr, patterns, itin>,
> -    Sched<[WriteBr]> {
> -  let isBranch         = 1;
> -  let isIndirectBranch = 1;
> -}
> -
> -// Note that these are not marked isCall or isReturn because as far as LLVM is
> -// concerned they're not. "ret" is just another jump unless it has been selected
> -// by LLVM as the function's return.
> -
> -let isBranch = 1 in {
> -  def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn),
> -                          "br\t$Rn", [(brind i64:$Rn)]> {
> -    let isBarrier = 1;
> -    let isTerminator = 1;
> -  }
> -
> -  let SchedRW = [WriteBrL] in {
> -    def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn),
> -                             "blr\t$Rn", [(AArch64Call i64:$Rn)]> {
> -      let isBarrier = 0;
> -      let isCall = 1;
> -      let Defs = [X30];
> -    }
> -  }
> -
> -  def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn),
> -                           "ret\t$Rn", []> {
> -    let isBarrier = 1;
> -    let isTerminator = 1;
> -    let isReturn = 1;
> -  }
> -
> -  // Create a separate pseudo-instruction for codegen to use so that we don't
> -  // flag x30 as used in every function. It'll be restored before the RET by the
> -  // epilogue if it's legitimately used.
> -  def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> {
> -    let isTerminator = 1;
> -    let isBarrier = 1;
> -    let isReturn = 1;
> -  }
> -
> -  def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> {
> -    let Rn = 0b11111;
> -    let isBarrier = 1;
> -    let isTerminator = 1;
> -    let isReturn = 1;
> -  }
> -
> -  def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> {
> -    let Rn = 0b11111;
> -    let isBarrier = 1;
> -  }
> -}
> -
> -def RETAlias : InstAlias<"ret", (RETx X30)>;
> -
> -
> -//===----------------------------------------------------------------------===//
> -// Address generation patterns
> -//===----------------------------------------------------------------------===//
> -
> -// Primary method of address generation for the small/absolute memory model is
> -// an ADRP/ADR pair:
> -//     ADRP x0, some_variable
> -//     ADD x0, x0, #:lo12:some_variable
> -//
> -// The load/store elision of the ADD is accomplished when selecting
> -// addressing-modes. This just mops up the cases where that doesn't work and we
> -// really need an address in some register.
> -
> -// This wrapper applies a LO12 modifier to the address. Otherwise we could just
> -// use the same address.
> -
> -class ADRP_ADD<SDNode Wrapper, SDNode addrop>
> - : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)),
> -       (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>;
> -
> -def : ADRP_ADD<A64WrapperSmall, tblockaddress>;
> -def : ADRP_ADD<A64WrapperSmall, texternalsym>;
> -def : ADRP_ADD<A64WrapperSmall, tglobaladdr>;
> -def : ADRP_ADD<A64WrapperSmall, tglobaltlsaddr>;
> -def : ADRP_ADD<A64WrapperSmall, tjumptable>;
> -def : ADRP_ADD<A64WrapperSmall, tconstpool>;
> -
> -//===----------------------------------------------------------------------===//
> -// GOT access patterns
> -//===----------------------------------------------------------------------===//
> -
> -class GOTLoadSmall<SDNode addrfrag>
> -  : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)),
> -        (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>;
> -
> -def : GOTLoadSmall<texternalsym>;
> -def : GOTLoadSmall<tglobaladdr>;
> -def : GOTLoadSmall<tglobaltlsaddr>;
> -
> -//===----------------------------------------------------------------------===//
> -// Tail call handling
> -//===----------------------------------------------------------------------===//
> -
> -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in {
> -  def TC_RETURNdi
> -    : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff),
> -                 [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>;
> -
> -  def TC_RETURNxi
> -    : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff),
> -                 [(AArch64tcret i64:$dst, (i32 timm:$FPDiff))]>;
> -}
> -
> -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
> -    Uses = [XSP] in {
> -  def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [],
> -                                  (Bimm bimm_target:$Label)>;
> -
> -  def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [],
> -                                 (BRx GPR64:$Rd)>;
> -}
> -
> -
> -def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
> -          (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Thread local storage
> -//===----------------------------------------------------------------------===//
> -
> -// This is a pseudo-instruction representing the ".tlsdesccall" directive in
> -// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the
> -// current location. It should always be immediately followed by a BLR
> -// instruction, and is intended solely for relaxation by the linker.
> -
> -def : Pat<(A64threadpointer), (MRSxi 0xde82)>;
> -
> -def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> {
> -  let hasSideEffects = 1;
> -}
> -
> -def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var),
> -                            [(A64tlsdesc_blr i64:$Rn, tglobaltlsaddr:$Var)]> {
> -  let isCall = 1;
> -  let Defs = [X30];
> -}
> -
> -def : Pat<(A64tlsdesc_blr i64:$Rn, texternalsym:$Var),
> -          (TLSDESC_BLRx $Rn, texternalsym:$Var)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Bitfield patterns
> -//===----------------------------------------------------------------------===//
> -
> -def bfi32_lsb_to_immr : SDNodeXForm<imm, [{
> -  return CurDAG->getTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64);
> -}]>;
> -
> -def bfi64_lsb_to_immr : SDNodeXForm<imm, [{
> -  return CurDAG->getTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64);
> -}]>;
> -
> -def bfi_width_to_imms : SDNodeXForm<imm, [{
> -  return CurDAG->getTargetConstant(N->getZExtValue() - 1, MVT::i64);
> -}]>;
> -
> -
> -// The simpler patterns deal with cases where no AND mask is actually needed
> -// (either all bits are used or the low 32 bits are used).
> -let AddedComplexity = 10 in {
> -
> -def : Pat<(A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
> -           (BFIxxii $src, $Rn,
> -                    (bfi64_lsb_to_immr (i64 imm:$ImmR)),
> -                    (bfi_width_to_imms (i64 imm:$ImmS)))>;
> -
> -def : Pat<(A64Bfi i32:$src, i32:$Rn, imm:$ImmR, imm:$ImmS),
> -          (BFIwwii $src, $Rn,
> -                   (bfi32_lsb_to_immr (i64 imm:$ImmR)),
> -                   (bfi_width_to_imms (i64 imm:$ImmS)))>;
> -
> -
> -def : Pat<(and (A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
> -               (i64 4294967295)),
> -          (SUBREG_TO_REG (i64 0),
> -                         (BFIwwii (EXTRACT_SUBREG $src, sub_32),
> -                                  (EXTRACT_SUBREG $Rn, sub_32),
> -                                  (bfi32_lsb_to_immr (i64 imm:$ImmR)),
> -                                  (bfi_width_to_imms (i64 imm:$ImmS))),
> -                         sub_32)>;
> -
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Miscellaneous patterns
> -//===----------------------------------------------------------------------===//
> -
> -// Truncation from 64 to 32-bits just involves renaming your register.
> -def : Pat<(i32 (trunc i64:$val)), (EXTRACT_SUBREG $val, sub_32)>;
> -
> -// Similarly, extension where we don't care about the high bits is
> -// just a rename.
> -def : Pat<(i64 (anyext i32:$val)),
> -          (INSERT_SUBREG (IMPLICIT_DEF), $val, sub_32)>;
> -
> -// SELECT instructions providing f128 types need to be handled by a
> -// pseudo-instruction since the eventual code will need to introduce basic
> -// blocks and control flow.
> -def F128CSEL : PseudoInst<(outs FPR128:$Rd),
> -                         (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond),
> -                         [(set f128:$Rd, (simple_select f128:$Rn, f128:$Rm))]> {
> -  let Uses = [NZCV];
> -  let usesCustomInserter = 1;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Load/store patterns
> -//===----------------------------------------------------------------------===//
> -
> -// There are lots of patterns here, because we need to allow at least three
> -// parameters to vary independently.
> -//   1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ...
> -//   2. LLVM source: zextloadi8, anyextloadi8, ...
> -//   3. Address-generation: A64Wrapper, (add BASE, OFFSET), ...
> -//
> -// The biggest problem turns out to be the address-generation variable. At the
> -// point of instantiation we need to produce two DAGs, one for the pattern and
> -// one for the instruction. Doing this at the lowest level of classes doesn't
> -// work.
> -//
> -// Consider the simple uimm12 addressing mode, and the desire to match both (add
> -// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the
> -// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or
> -// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this
> -// operation, and PatFrags are for selection not output.
> -//
> -// As a result, the address-generation patterns are the final
> -// instantiations. However, we do still need to vary the operand for the address
> -// further down (At the point we're deciding A64WrapperSmall, we don't know
> -// the memory width of the operation).
> -
> -//===------------------------------
> -// 1. Basic infrastructural defs
> -//===------------------------------
> -
> -// First, some simple classes for !foreach and !subst to use:
> -class Decls {
> -  dag pattern;
> -}
> -
> -def decls : Decls;
> -def ALIGN;
> -def INST;
> -def OFFSET;
> -def SHIFT;
> -
> -// You can't use !subst on an actual immediate, but you *can* use it on an
> -// operand record that happens to match a single immediate. So we do.
> -def imm_eq0 : ImmLeaf<i64, [{ return Imm == 0; }]>;
> -def imm_eq1 : ImmLeaf<i64, [{ return Imm == 1; }]>;
> -def imm_eq2 : ImmLeaf<i64, [{ return Imm == 2; }]>;
> -def imm_eq3 : ImmLeaf<i64, [{ return Imm == 3; }]>;
> -def imm_eq4 : ImmLeaf<i64, [{ return Imm == 4; }]>;
> -
> -// If the low bits of a pointer are known to be 0 then an "or" is just as good
> -// as addition for computing an offset. This fragment forwards that check for
> -// TableGen's use.
> -def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),
> -[{
> -  return CurDAG->isBaseWithConstantOffset(SDValue(N, 0));
> -}]>;
> -
> -// Load/store (unsigned immediate) operations with relocations against global
> -// symbols (for lo12) are only valid if those symbols have correct alignment
> -// (since the immediate offset is divided by the access scale, it can't have a
> -// remainder).
> -//
> -// The guaranteed alignment is provided as part of the WrapperSmall
> -// operation, and checked against one of these.
> -def any_align   : ImmLeaf<i32, [{ (void)Imm; return true; }]>;
> -def min_align2  : ImmLeaf<i32, [{ return Imm >= 2; }]>;
> -def min_align4  : ImmLeaf<i32, [{ return Imm >= 4; }]>;
> -def min_align8  : ImmLeaf<i32, [{ return Imm >= 8; }]>;
> -def min_align16 : ImmLeaf<i32, [{ return Imm >= 16; }]>;
> -
> -// "Normal" load/store instructions can be used on atomic operations, provided
> -// the ordering parameter is at most "monotonic". Anything above that needs
> -// special handling with acquire/release instructions.
> -class simple_load<PatFrag base>
> -  : PatFrag<(ops node:$ptr), (base node:$ptr), [{
> -  return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
> -}]>;
> -
> -def atomic_load_simple_i8  : simple_load<atomic_load_8>;
> -def atomic_load_simple_i16 : simple_load<atomic_load_16>;
> -def atomic_load_simple_i32 : simple_load<atomic_load_32>;
> -def atomic_load_simple_i64 : simple_load<atomic_load_64>;
> -
> -class simple_store<PatFrag base>
> -  : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
> -  return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
> -}]>;
> -
> -def atomic_store_simple_i8  : simple_store<atomic_store_8>;
> -def atomic_store_simple_i16 : simple_store<atomic_store_16>;
> -def atomic_store_simple_i32 : simple_store<atomic_store_32>;
> -def atomic_store_simple_i64 : simple_store<atomic_store_64>;
> -
> -//===------------------------------
> -// 2. UImm12 and SImm9
> -//===------------------------------
> -
> -// These instructions have two operands providing the address so they can be
> -// treated similarly for most purposes.
> -
> -//===------------------------------
> -// 2.1 Base patterns covering extend/truncate semantics
> -//===------------------------------
> -
> -// Atomic patterns can be shared between integer operations of all sizes, a
> -// quick multiclass here allows reuse.
> -multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
> -                          dag Offset, dag address, ValueType transty,
> -                          ValueType sty> {
> -  def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
> -            (LOAD Base, Offset)>;
> -
> -  def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
> -            (STORE $Rt, Base, Offset)>;
> -}
> -
> -// Instructions accessing a memory chunk smaller than a register (or, in a
> -// pinch, the same size) have a characteristic set of patterns they want to
> -// match: extending loads and truncating stores. This class deals with the
> -// sign-neutral version of those patterns.
> -//
> -// It will be instantiated across multiple addressing-modes.
> -multiclass ls_small_pats<Instruction LOAD, Instruction STORE,
> -                         dag Base, dag Offset,
> -                         dag address, ValueType sty>
> -  : ls_atomic_pats<LOAD, STORE, Base, Offset, address, i32, sty> {
> -  def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>;
> -
> -  def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>;
> -
> -  // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
> -  // register was actually set.
> -  def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
> -            (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
> -
> -  def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
> -            (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
> -
> -  def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
> -            (STORE $Rt, Base, Offset)>;
> -
> -  // For truncating store from 64-bits, we have to manually tell LLVM to
> -  // ignore the high bits of the x register.
> -  def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
> -            (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
> -}
> -
> -// Next come patterns for sign-extending loads.
> -multiclass load_signed_pats<string T, string U, dag Base, dag Offset,
> -                            dag address, ValueType sty> {
> -  def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
> -            (!cast<Instruction>("LDRS" # T # "w" # U) Base, Offset)>;
> -
> -  def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
> -            (!cast<Instruction>("LDRS" # T # "x" # U) Base, Offset)>;
> -
> -}
> -
> -// and finally "natural-width" loads and stores come next.
> -multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
> -                           dag Offset, dag address, ValueType sty> {
> -  def : Pat<(sty (load address)), (LOAD Base, Offset)>;
> -  def : Pat<(store sty:$Rt, address), (STORE $Rt, Base, Offset)>;
> -}
> -
> -// Integer operations also get atomic instructions to select for.
> -multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
> -                           dag Offset, dag address, ValueType sty>
> -  : ls_neutral_pats<LOAD, STORE, Base, Offset, address, sty>,
> -    ls_atomic_pats<LOAD, STORE, Base, Offset, address, sty, sty>;
> -
> -//===------------------------------
> -// 2.2. Addressing-mode instantiations
> -//===------------------------------
> -
> -multiclass uimm12_pats<dag address, dag Base, dag Offset> {
> -  defm : ls_small_pats<LS8_LDR, LS8_STR, Base,
> -                       !foreach(decls.pattern, Offset,
> -                                !subst(OFFSET, byte_uimm12, decls.pattern)),
> -                       !foreach(decls.pattern, address,
> -                                !subst(OFFSET, byte_uimm12,
> -                                !subst(ALIGN, any_align, decls.pattern))),
> -                       i8>;
> -  defm : ls_small_pats<LS16_LDR, LS16_STR, Base,
> -                       !foreach(decls.pattern, Offset,
> -                                !subst(OFFSET, hword_uimm12, decls.pattern)),
> -                       !foreach(decls.pattern, address,
> -                                !subst(OFFSET, hword_uimm12,
> -                                !subst(ALIGN, min_align2, decls.pattern))),
> -                       i16>;
> -  defm : ls_small_pats<LS32_LDR, LS32_STR, Base,
> -                       !foreach(decls.pattern, Offset,
> -                                !subst(OFFSET, word_uimm12, decls.pattern)),
> -                       !foreach(decls.pattern, address,
> -                                !subst(OFFSET, word_uimm12,
> -                                !subst(ALIGN, min_align4, decls.pattern))),
> -                       i32>;
> -
> -  defm : ls_int_neutral_pats<LS32_LDR, LS32_STR, Base,
> -                          !foreach(decls.pattern, Offset,
> -                                   !subst(OFFSET, word_uimm12, decls.pattern)),
> -                          !foreach(decls.pattern, address,
> -                                   !subst(OFFSET, word_uimm12,
> -                                   !subst(ALIGN, min_align4, decls.pattern))),
> -                          i32>;
> -
> -  defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base,
> -                          !foreach(decls.pattern, Offset,
> -                                   !subst(OFFSET, dword_uimm12, decls.pattern)),
> -                          !foreach(decls.pattern, address,
> -                                   !subst(OFFSET, dword_uimm12,
> -                                   !subst(ALIGN, min_align8, decls.pattern))),
> -                          i64>;
> -
> -  defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base,
> -                          !foreach(decls.pattern, Offset,
> -                                   !subst(OFFSET, hword_uimm12, decls.pattern)),
> -                          !foreach(decls.pattern, address,
> -                                   !subst(OFFSET, hword_uimm12,
> -                                   !subst(ALIGN, min_align2, decls.pattern))),
> -                          f16>;
> -
> -  defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base,
> -                          !foreach(decls.pattern, Offset,
> -                                   !subst(OFFSET, word_uimm12, decls.pattern)),
> -                          !foreach(decls.pattern, address,
> -                                   !subst(OFFSET, word_uimm12,
> -                                   !subst(ALIGN, min_align4, decls.pattern))),
> -                          f32>;
> -
> -  defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
> -                          !foreach(decls.pattern, Offset,
> -                                   !subst(OFFSET, dword_uimm12, decls.pattern)),
> -                          !foreach(decls.pattern, address,
> -                                   !subst(OFFSET, dword_uimm12,
> -                                   !subst(ALIGN, min_align8, decls.pattern))),
> -                          f64>;
> -
> -  defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
> -                          !foreach(decls.pattern, Offset,
> -                                   !subst(OFFSET, qword_uimm12, decls.pattern)),
> -                          !foreach(decls.pattern, address,
> -                                   !subst(OFFSET, qword_uimm12,
> -                                   !subst(ALIGN, min_align16, decls.pattern))),
> -                          f128>;
> -
> -  defm : load_signed_pats<"B", "", Base,
> -                          !foreach(decls.pattern, Offset,
> -                                   !subst(OFFSET, byte_uimm12, decls.pattern)),
> -                          !foreach(decls.pattern, address,
> -                                   !subst(OFFSET, byte_uimm12,
> -                                   !subst(ALIGN, any_align, decls.pattern))),
> -                          i8>;
> -
> -  defm : load_signed_pats<"H", "", Base,
> -                          !foreach(decls.pattern, Offset,
> -                                   !subst(OFFSET, hword_uimm12, decls.pattern)),
> -                          !foreach(decls.pattern, address,
> -                                   !subst(OFFSET, hword_uimm12,
> -                                   !subst(ALIGN, min_align2, decls.pattern))),
> -                          i16>;
> -
> -  def : Pat<(sextloadi32 !foreach(decls.pattern, address,
> -                                  !subst(OFFSET, word_uimm12,
> -                                  !subst(ALIGN, min_align4, decls.pattern)))),
> -            (LDRSWx Base, !foreach(decls.pattern, Offset,
> -                                  !subst(OFFSET, word_uimm12, decls.pattern)))>;
> -}
> -
> -// Straightforward patterns of last resort: a pointer with or without an
> -// appropriate offset.
> -defm : uimm12_pats<(i64 i64:$Rn), (i64 i64:$Rn), (i64 0)>;
> -defm : uimm12_pats<(add i64:$Rn, OFFSET:$UImm12),
> -                   (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
> -
> -// The offset could be hidden behind an "or", of course:
> -defm : uimm12_pats<(add_like_or i64:$Rn, OFFSET:$UImm12),
> -                   (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
> -
> -// Global addresses under the small-absolute model should use these
> -// instructions. There are ELF relocations specifically for it.
> -defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN),
> -                   (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>;
> -
> -defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12,
> -                                    ALIGN),
> -                   (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>;
> -
> -// External symbols that make it this far should also get standard relocations.
> -defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12,
> -                                    ALIGN),
> -                   (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>;
> -
> -defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
> -                   (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
> -
> -// We also want to use uimm12 instructions for local variables at the moment.
> -def tframeindex_XFORM : SDNodeXForm<frameindex, [{
> -  int FI = cast<FrameIndexSDNode>(N)->getIndex();
> -  return CurDAG->getTargetFrameIndex(FI, MVT::i64);
> -}]>;
> -
> -defm : uimm12_pats<(i64 frameindex:$Rn),
> -                   (tframeindex_XFORM tframeindex:$Rn), (i64 0)>;
> -
> -// These can be much simpler than uimm12 because we don't to change the operand
> -// type (e.g. LDURB and LDURH take the same operands).
> -multiclass simm9_pats<dag address, dag Base, dag Offset> {
> -  defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>;
> -  defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>;
> -
> -  defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address, i32>;
> -  defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address, i64>;
> -
> -  defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address, f16>;
> -  defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address, f32>;
> -  defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address, f64>;
> -  defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address,
> -                         f128>;
> -
> -  def : Pat<(i64 (zextloadi32 address)),
> -            (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>;
> -
> -  def : Pat<(truncstorei32 i64:$Rt, address),
> -            (LS32_STUR (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
> -
> -  defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>;
> -  defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>;
> -  def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>;
> -}
> -
> -defm : simm9_pats<(add i64:$Rn, simm9:$SImm9),
> -                  (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
> -
> -defm : simm9_pats<(add_like_or i64:$Rn, simm9:$SImm9),
> -                  (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
> -
> -
> -//===------------------------------
> -// 3. Register offset patterns
> -//===------------------------------
> -
> -// Atomic patterns can be shared between integer operations of all sizes, a
> -// quick multiclass here allows reuse.
> -multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
> -                          dag Offset, dag Extend, dag address,
> -                          ValueType transty, ValueType sty> {
> -  def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
> -            (LOAD Base, Offset, Extend)>;
> -
> -  def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
> -            (STORE $Rt, Base, Offset, Extend)>;
> -}
> -
> -// The register offset instructions take three operands giving the instruction,
> -// and have an annoying split between instructions where Rm is 32-bit and
> -// 64-bit. So we need a special hierarchy to describe them. Other than that the
> -// same operations should be supported as for simm9 and uimm12 addressing.
> -
> -multiclass ro_small_pats<Instruction LOAD, Instruction STORE,
> -                         dag Base, dag Offset, dag Extend,
> -                         dag address, ValueType sty>
> -  : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, i32, sty> {
> -  def : Pat<(!cast<SDNode>(zextload # sty) address),
> -            (LOAD Base, Offset, Extend)>;
> -
> -  def : Pat<(!cast<SDNode>(extload # sty) address),
> -            (LOAD Base, Offset, Extend)>;
> -
> -  // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
> -  // register was actually set.
> -  def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
> -            (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
> -
> -  def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
> -            (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
> -
> -  def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
> -            (STORE $Rt, Base, Offset, Extend)>;
> -
> -  // For truncating store from 64-bits, we have to manually tell LLVM to
> -  // ignore the high bits of the x register.
> -  def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
> -            (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset, Extend)>;
> -
> -}
> -
> -// Next come patterns for sign-extending loads.
> -multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend,
> -                          dag address, ValueType sty> {
> -  def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
> -            (!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset")
> -              Base, Offset, Extend)>;
> -
> -  def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
> -            (!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset")
> -              Base, Offset, Extend)>;
> -}
> -
> -// and finally "natural-width" loads and stores come next.
> -multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE,
> -                           dag Base, dag Offset, dag Extend, dag address,
> -                           ValueType sty> {
> -  def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>;
> -  def : Pat<(store sty:$Rt, address),
> -            (STORE $Rt, Base, Offset, Extend)>;
> -}
> -
> -multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE,
> -                               dag Base, dag Offset, dag Extend, dag address,
> -                               ValueType sty>
> -  : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, sty>,
> -    ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, sty, sty>;
> -
> -multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset,
> -                       dag Extend> {
> -  defm : ro_small_pats<!cast<Instruction>("LS8_" # Rm # "_RegOffset_LDR"),
> -                       !cast<Instruction>("LS8_" # Rm # "_RegOffset_STR"),
> -                       Base, Offset, Extend,
> -                       !foreach(decls.pattern, address,
> -                                !subst(SHIFT, imm_eq0, decls.pattern)),
> -                       i8>;
> -  defm : ro_small_pats<!cast<Instruction>("LS16_" # Rm # "_RegOffset_LDR"),
> -                       !cast<Instruction>("LS16_" # Rm # "_RegOffset_STR"),
> -                       Base, Offset, Extend,
> -                       !foreach(decls.pattern, address,
> -                                !subst(SHIFT, imm_eq1, decls.pattern)),
> -                       i16>;
> -  defm : ro_small_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
> -                       !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
> -                       Base, Offset, Extend,
> -                       !foreach(decls.pattern, address,
> -                                !subst(SHIFT, imm_eq2, decls.pattern)),
> -                       i32>;
> -
> -  defm : ro_int_neutral_pats<
> -                            !cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
> -                            !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
> -                            Base, Offset, Extend,
> -                            !foreach(decls.pattern, address,
> -                                     !subst(SHIFT, imm_eq2, decls.pattern)),
> -                            i32>;
> -
> -  defm : ro_int_neutral_pats<
> -                            !cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"),
> -                            !cast<Instruction>("LS64_" # Rm # "_RegOffset_STR"),
> -                            Base, Offset, Extend,
> -                            !foreach(decls.pattern, address,
> -                                     !subst(SHIFT, imm_eq3, decls.pattern)),
> -                            i64>;
> -
> -  defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"),
> -                         !cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"),
> -                         Base, Offset, Extend,
> -                         !foreach(decls.pattern, address,
> -                                  !subst(SHIFT, imm_eq1, decls.pattern)),
> -                         f16>;
> -
> -  defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"),
> -                         !cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"),
> -                         Base, Offset, Extend,
> -                         !foreach(decls.pattern, address,
> -                                  !subst(SHIFT, imm_eq2, decls.pattern)),
> -                         f32>;
> -
> -  defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"),
> -                         !cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"),
> -                         Base, Offset, Extend,
> -                         !foreach(decls.pattern, address,
> -                                  !subst(SHIFT, imm_eq3, decls.pattern)),
> -                         f64>;
> -
> -  defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"),
> -                         !cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"),
> -                         Base, Offset, Extend,
> -                         !foreach(decls.pattern, address,
> -                                  !subst(SHIFT, imm_eq4, decls.pattern)),
> -                         f128>;
> -
> -  defm : ro_signed_pats<"B", Rm, Base, Offset, Extend,
> -                        !foreach(decls.pattern, address,
> -                                 !subst(SHIFT, imm_eq0, decls.pattern)),
> -                        i8>;
> -
> -  defm : ro_signed_pats<"H", Rm, Base, Offset, Extend,
> -                        !foreach(decls.pattern, address,
> -                                 !subst(SHIFT, imm_eq1, decls.pattern)),
> -                        i16>;
> -
> -  def : Pat<(sextloadi32 !foreach(decls.pattern, address,
> -                                  !subst(SHIFT, imm_eq2, decls.pattern))),
> -            (!cast<Instruction>("LDRSWx_" # Rm # "_RegOffset")
> -              Base, Offset, Extend)>;
> -}
> -
> -
> -// Finally we're in a position to tell LLVM exactly what addresses are reachable
> -// using register-offset instructions. Essentially a base plus a possibly
> -// extended, possibly shifted (by access size) offset.
> -
> -defm : regoff_pats<"Wm", (add i64:$Rn, (sext i32:$Rm)),
> -                   (i64 i64:$Rn), (i32 i32:$Rm), (i64 6)>;
> -
> -defm : regoff_pats<"Wm", (add i64:$Rn, (shl (sext i32:$Rm), SHIFT)),
> -                   (i64 i64:$Rn), (i32 i32:$Rm), (i64 7)>;
> -
> -defm : regoff_pats<"Wm", (add i64:$Rn, (zext i32:$Rm)),
> -                   (i64 i64:$Rn), (i32 i32:$Rm), (i64 2)>;
> -
> -defm : regoff_pats<"Wm", (add i64:$Rn, (shl (zext i32:$Rm), SHIFT)),
> -                   (i64 i64:$Rn), (i32 i32:$Rm), (i64 3)>;
> -
> -defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm),
> -                   (i64 i64:$Rn), (i64 i64:$Rm), (i64 2)>;
> -
> -defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
> -                   (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Advanced SIMD (NEON) Support
> -//
> -
> -include "AArch64InstrNEON.td"
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits