[llvm] r209576 - AArch64/ARM64: remove AArch64 from tree prior to renaming ARM64.
Rafael EspĂndola
rafael.espindola at gmail.com
Sat May 24 15:39:52 PDT 2014
Congratulations!
355 files changed, 73 insertions(+), 67373 deletions(-)
On 24 May 2014 08:42, Tim Northover <tnorthover at apple.com> wrote:
> Author: tnorthover
> Date: Sat May 24 07:42:26 2014
> New Revision: 209576
>
> URL: http://llvm.org/viewvc/llvm-project?rev=209576&view=rev
> Log:
> AArch64/ARM64: remove AArch64 from tree prior to renaming ARM64.
>
> I'm doing this in two phases for a better "git blame" record. This
> commit removes the previous AArch64 backend and redirects all
> functionality to ARM64. It also deduplicates test-lines and removes
> orphaned AArch64 tests.
>
> The next step will be "git mv ARM64 AArch64" and rewire most of the
> tests.
>
> Hopefully LLVM is still functional, though it would be even better if
> no-one ever had to care because the rename happens straight
> afterwards.
>
> Removed:
> llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
> llvm/trunk/lib/Target/AArch64/AArch64.h
> llvm/trunk/lib/Target/AArch64/AArch64.td
> llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp
> llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.h
> llvm/trunk/lib/Target/AArch64/AArch64BranchFixupPass.cpp
> llvm/trunk/lib/Target/AArch64/AArch64CallingConv.td
> llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
> llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
> llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
> llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
> llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td
> llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
> llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
> llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
> llvm/trunk/lib/Target/AArch64/AArch64InstrNEON.td
> llvm/trunk/lib/Target/AArch64/AArch64MCInstLower.cpp
> llvm/trunk/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
> llvm/trunk/lib/Target/AArch64/AArch64MachineFunctionInfo.h
> llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp
> llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.h
> llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.td
> llvm/trunk/lib/Target/AArch64/AArch64Schedule.td
> llvm/trunk/lib/Target/AArch64/AArch64ScheduleA53.td
> llvm/trunk/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
> llvm/trunk/lib/Target/AArch64/AArch64SelectionDAGInfo.h
> llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp
> llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
> llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
> llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.h
> llvm/trunk/lib/Target/AArch64/AArch64TargetObjectFile.cpp
> llvm/trunk/lib/Target/AArch64/AArch64TargetObjectFile.h
> llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
> llvm/trunk/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
> llvm/trunk/lib/Target/AArch64/AsmParser/CMakeLists.txt
> llvm/trunk/lib/Target/AArch64/AsmParser/LLVMBuild.txt
> llvm/trunk/lib/Target/AArch64/AsmParser/Makefile
> llvm/trunk/lib/Target/AArch64/CMakeLists.txt
> llvm/trunk/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
> llvm/trunk/lib/Target/AArch64/Disassembler/CMakeLists.txt
> llvm/trunk/lib/Target/AArch64/Disassembler/LLVMBuild.txt
> llvm/trunk/lib/Target/AArch64/Disassembler/Makefile
> llvm/trunk/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
> llvm/trunk/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
> llvm/trunk/lib/Target/AArch64/InstPrinter/CMakeLists.txt
> llvm/trunk/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
> llvm/trunk/lib/Target/AArch64/InstPrinter/Makefile
> llvm/trunk/lib/Target/AArch64/LLVMBuild.txt
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
> llvm/trunk/lib/Target/AArch64/MCTargetDesc/Makefile
> llvm/trunk/lib/Target/AArch64/Makefile
> llvm/trunk/lib/Target/AArch64/README.txt
> llvm/trunk/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
> llvm/trunk/lib/Target/AArch64/TargetInfo/CMakeLists.txt
> llvm/trunk/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
> llvm/trunk/lib/Target/AArch64/TargetInfo/Makefile
> llvm/trunk/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
> llvm/trunk/lib/Target/AArch64/Utils/AArch64BaseInfo.h
> llvm/trunk/lib/Target/AArch64/Utils/CMakeLists.txt
> llvm/trunk/lib/Target/AArch64/Utils/LLVMBuild.txt
> llvm/trunk/lib/Target/AArch64/Utils/Makefile
> llvm/trunk/test/CodeGen/AArch64/andCmpBrToTBZ.ll
> llvm/trunk/test/CodeGen/AArch64/concatvector-bugs.ll
> llvm/trunk/test/CodeGen/AArch64/fp128.ll
> llvm/trunk/test/CodeGen/AArch64/global_merge_1.ll
> llvm/trunk/test/CodeGen/AArch64/i128-shift.ll
> llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints.ll
> llvm/trunk/test/CodeGen/AArch64/inline-asm-modifiers.ll
> llvm/trunk/test/CodeGen/AArch64/large-frame.ll
> llvm/trunk/test/CodeGen/AArch64/lit.local.cfg
> llvm/trunk/test/CodeGen/AArch64/literal_pools_int.ll
> llvm/trunk/test/CodeGen/AArch64/misched-basic-A53.ll
> llvm/trunk/test/CodeGen/AArch64/named-reg-alloc.ll
> llvm/trunk/test/CodeGen/AArch64/named-reg-notareg.ll
> llvm/trunk/test/CodeGen/AArch64/neon-2velem-high.ll
> llvm/trunk/test/CodeGen/AArch64/neon-2velem.ll
> llvm/trunk/test/CodeGen/AArch64/neon-3vdiff.ll
> llvm/trunk/test/CodeGen/AArch64/neon-aba-abd.ll
> llvm/trunk/test/CodeGen/AArch64/neon-across.ll
> llvm/trunk/test/CodeGen/AArch64/neon-add-pairwise.ll
> llvm/trunk/test/CodeGen/AArch64/neon-add-sub.ll
> llvm/trunk/test/CodeGen/AArch64/neon-bsl.ll
> llvm/trunk/test/CodeGen/AArch64/neon-copy.ll
> llvm/trunk/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll
> llvm/trunk/test/CodeGen/AArch64/neon-crypto.ll
> llvm/trunk/test/CodeGen/AArch64/neon-facge-facgt.ll
> llvm/trunk/test/CodeGen/AArch64/neon-frsqrt-frecp.ll
> llvm/trunk/test/CodeGen/AArch64/neon-halving-add-sub.ll
> llvm/trunk/test/CodeGen/AArch64/neon-load-store-v1i32.ll
> llvm/trunk/test/CodeGen/AArch64/neon-max-min-pairwise.ll
> llvm/trunk/test/CodeGen/AArch64/neon-max-min.ll
> llvm/trunk/test/CodeGen/AArch64/neon-misc-scalar.ll
> llvm/trunk/test/CodeGen/AArch64/neon-misc.ll
> llvm/trunk/test/CodeGen/AArch64/neon-mul-div.ll
> llvm/trunk/test/CodeGen/AArch64/neon-rounding-halving-add.ll
> llvm/trunk/test/CodeGen/AArch64/neon-rounding-shift.ll
> llvm/trunk/test/CodeGen/AArch64/neon-saturating-add-sub.ll
> llvm/trunk/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
> llvm/trunk/test/CodeGen/AArch64/neon-saturating-shift.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-abs.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-add-sub.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-compare.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-cvt.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-ext.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-fabd.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-fcvt.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-fp-compare.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-mul.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-neg.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-recip.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-shift.ll
> llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll
> llvm/trunk/test/CodeGen/AArch64/neon-shift.ll
> llvm/trunk/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll
> llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll
> llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst-one.ll
> llvm/trunk/test/CodeGen/AArch64/neon-simd-ldst.ll
> llvm/trunk/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll
> llvm/trunk/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll
> llvm/trunk/test/CodeGen/AArch64/neon-simd-shift.ll
> llvm/trunk/test/CodeGen/AArch64/neon-simd-tbl.ll
> llvm/trunk/test/CodeGen/AArch64/neon-simd-vget.ll
> llvm/trunk/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll
> llvm/trunk/test/CodeGen/AArch64/neon-v1i1-setcc.ll
> llvm/trunk/test/CodeGen/AArch64/neon-vector-list-spill.ll
> llvm/trunk/test/CodeGen/AArch64/regress-wzr-allocatable.ll
> llvm/trunk/test/CodeGen/AArch64/sext_inreg.ll
> llvm/trunk/test/CodeGen/AArch64/stackpointer.ll
> llvm/trunk/test/CodeGen/AArch64/tls-dynamic-together.ll
> llvm/trunk/test/CodeGen/AArch64/tls-dynamics.ll
> llvm/trunk/test/CodeGen/AArch64/tls-execs.ll
> llvm/trunk/test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll
> llvm/trunk/test/CodeGen/AArch64/variadic.ll
> llvm/trunk/test/DebugInfo/AArch64/cfi-frame.ll
> llvm/trunk/test/DebugInfo/AArch64/variable-loc.ll
> llvm/trunk/test/MC/AArch64/elf-reloc-addend.s
> Modified:
> llvm/trunk/CMakeLists.txt
> llvm/trunk/autoconf/configure.ac
> llvm/trunk/configure
> llvm/trunk/include/llvm/IR/Intrinsics.td
> llvm/trunk/lib/Target/ARM64/ARM64AsmPrinter.cpp
> llvm/trunk/lib/Target/ARM64/ARM64TargetMachine.cpp
> llvm/trunk/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp
> llvm/trunk/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp
> llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp
> llvm/trunk/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h
> llvm/trunk/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp
> llvm/trunk/lib/Target/LLVMBuild.txt
> llvm/trunk/test/CodeGen/AArch64/128bit_load_store.ll
> llvm/trunk/test/CodeGen/AArch64/adc.ll
> llvm/trunk/test/CodeGen/AArch64/addsub-shifted.ll
> llvm/trunk/test/CodeGen/AArch64/addsub.ll
> llvm/trunk/test/CodeGen/AArch64/addsub_ext.ll
> llvm/trunk/test/CodeGen/AArch64/alloca.ll
> llvm/trunk/test/CodeGen/AArch64/analyze-branch.ll
> llvm/trunk/test/CodeGen/AArch64/assertion-rc-mismatch.ll
> llvm/trunk/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
> llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll
> llvm/trunk/test/CodeGen/AArch64/basic-pic.ll
> llvm/trunk/test/CodeGen/AArch64/bitfield-insert-0.ll
> llvm/trunk/test/CodeGen/AArch64/bitfield-insert.ll
> llvm/trunk/test/CodeGen/AArch64/bitfield.ll
> llvm/trunk/test/CodeGen/AArch64/blockaddress.ll
> llvm/trunk/test/CodeGen/AArch64/bool-loads.ll
> llvm/trunk/test/CodeGen/AArch64/breg.ll
> llvm/trunk/test/CodeGen/AArch64/callee-save.ll
> llvm/trunk/test/CodeGen/AArch64/code-model-large-abs.ll
> llvm/trunk/test/CodeGen/AArch64/compare-branch.ll
> llvm/trunk/test/CodeGen/AArch64/complex-copy-noneon.ll
> llvm/trunk/test/CodeGen/AArch64/cond-sel.ll
> llvm/trunk/test/CodeGen/AArch64/cpus.ll
> llvm/trunk/test/CodeGen/AArch64/directcond.ll
> llvm/trunk/test/CodeGen/AArch64/dp-3source.ll
> llvm/trunk/test/CodeGen/AArch64/dp1.ll
> llvm/trunk/test/CodeGen/AArch64/dp2.ll
> llvm/trunk/test/CodeGen/AArch64/eliminate-trunc.ll
> llvm/trunk/test/CodeGen/AArch64/extern-weak.ll
> llvm/trunk/test/CodeGen/AArch64/extract.ll
> llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll
> llvm/trunk/test/CodeGen/AArch64/fastcc.ll
> llvm/trunk/test/CodeGen/AArch64/fcmp.ll
> llvm/trunk/test/CodeGen/AArch64/fcvt-fixed.ll
> llvm/trunk/test/CodeGen/AArch64/fcvt-int.ll
> llvm/trunk/test/CodeGen/AArch64/flags-multiuse.ll
> llvm/trunk/test/CodeGen/AArch64/floatdp_1source.ll
> llvm/trunk/test/CodeGen/AArch64/floatdp_2source.ll
> llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll
> llvm/trunk/test/CodeGen/AArch64/fp-dp3.ll
> llvm/trunk/test/CodeGen/AArch64/fp128-folding.ll
> llvm/trunk/test/CodeGen/AArch64/fpimm.ll
> llvm/trunk/test/CodeGen/AArch64/frameaddr.ll
> llvm/trunk/test/CodeGen/AArch64/free-zext.ll
> llvm/trunk/test/CodeGen/AArch64/func-argpassing.ll
> llvm/trunk/test/CodeGen/AArch64/func-calls.ll
> llvm/trunk/test/CodeGen/AArch64/global-alignment.ll
> llvm/trunk/test/CodeGen/AArch64/got-abuse.ll
> llvm/trunk/test/CodeGen/AArch64/i128-align.ll
> llvm/trunk/test/CodeGen/AArch64/illegal-float-ops.ll
> llvm/trunk/test/CodeGen/AArch64/init-array.ll
> llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
> llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
> llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
> llvm/trunk/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
> llvm/trunk/test/CodeGen/AArch64/jump-table.ll
> llvm/trunk/test/CodeGen/AArch64/large-consts.ll
> llvm/trunk/test/CodeGen/AArch64/ldst-regoffset.ll
> llvm/trunk/test/CodeGen/AArch64/ldst-unscaledimm.ll
> llvm/trunk/test/CodeGen/AArch64/ldst-unsignedimm.ll
> llvm/trunk/test/CodeGen/AArch64/literal_pools_float.ll
> llvm/trunk/test/CodeGen/AArch64/local_vars.ll
> llvm/trunk/test/CodeGen/AArch64/logical-imm.ll
> llvm/trunk/test/CodeGen/AArch64/logical_shifted_reg.ll
> llvm/trunk/test/CodeGen/AArch64/mature-mc-support.ll
> llvm/trunk/test/CodeGen/AArch64/movw-consts.ll
> llvm/trunk/test/CodeGen/AArch64/movw-shift-encoding.ll
> llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll
> llvm/trunk/test/CodeGen/AArch64/neon-bitcast.ll
> llvm/trunk/test/CodeGen/AArch64/neon-bitwise-instructions.ll
> llvm/trunk/test/CodeGen/AArch64/neon-compare-instructions.ll
> llvm/trunk/test/CodeGen/AArch64/neon-diagnostics.ll
> llvm/trunk/test/CodeGen/AArch64/neon-extract.ll
> llvm/trunk/test/CodeGen/AArch64/neon-fma.ll
> llvm/trunk/test/CodeGen/AArch64/neon-fpround_f128.ll
> llvm/trunk/test/CodeGen/AArch64/neon-idiv.ll
> llvm/trunk/test/CodeGen/AArch64/neon-mla-mls.ll
> llvm/trunk/test/CodeGen/AArch64/neon-mov.ll
> llvm/trunk/test/CodeGen/AArch64/neon-or-combine.ll
> llvm/trunk/test/CodeGen/AArch64/neon-perm.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
> llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll
> llvm/trunk/test/CodeGen/AArch64/neon-shift-left-long.ll
> llvm/trunk/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
> llvm/trunk/test/CodeGen/AArch64/pic-eh-stubs.ll
> llvm/trunk/test/CodeGen/AArch64/ragreedy-csr.ll
> llvm/trunk/test/CodeGen/AArch64/regress-bitcast-formals.ll
> llvm/trunk/test/CodeGen/AArch64/regress-f128csel-flags.ll
> llvm/trunk/test/CodeGen/AArch64/regress-fp128-livein.ll
> llvm/trunk/test/CodeGen/AArch64/regress-tail-livereg.ll
> llvm/trunk/test/CodeGen/AArch64/regress-tblgen-chains.ll
> llvm/trunk/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
> llvm/trunk/test/CodeGen/AArch64/returnaddr.ll
> llvm/trunk/test/CodeGen/AArch64/setcc-takes-i32.ll
> llvm/trunk/test/CodeGen/AArch64/sibling-call.ll
> llvm/trunk/test/CodeGen/AArch64/sincos-expansion.ll
> llvm/trunk/test/CodeGen/AArch64/sincospow-vector-expansion.ll
> llvm/trunk/test/CodeGen/AArch64/tail-call.ll
> llvm/trunk/test/CodeGen/AArch64/tst-br.ll
> llvm/trunk/test/CodeGen/AArch64/zero-reg.ll
> llvm/trunk/test/DebugInfo/AArch64/lit.local.cfg
> llvm/trunk/test/MC/AArch64/adrp-relocation.s
> llvm/trunk/test/MC/AArch64/basic-a64-diagnostics.s
> llvm/trunk/test/MC/AArch64/basic-a64-instructions.s
> llvm/trunk/test/MC/AArch64/basic-pic.s
> llvm/trunk/test/MC/AArch64/elf-extern.s
> llvm/trunk/test/MC/AArch64/elf-objdump.s
> llvm/trunk/test/MC/AArch64/elf-reloc-addsubimm.s
> llvm/trunk/test/MC/AArch64/elf-reloc-ldrlit.s
> llvm/trunk/test/MC/AArch64/elf-reloc-ldstunsimm.s
> llvm/trunk/test/MC/AArch64/elf-reloc-movw.s
> llvm/trunk/test/MC/AArch64/elf-reloc-pcreladdressing.s
> llvm/trunk/test/MC/AArch64/elf-reloc-tstb.s
> llvm/trunk/test/MC/AArch64/elf-reloc-uncondbrimm.s
> llvm/trunk/test/MC/AArch64/gicv3-regs-diagnostics.s
> llvm/trunk/test/MC/AArch64/gicv3-regs.s
> llvm/trunk/test/MC/AArch64/inline-asm-modifiers.s
> llvm/trunk/test/MC/AArch64/jump-table.s
> llvm/trunk/test/MC/AArch64/lit.local.cfg
> llvm/trunk/test/MC/AArch64/mapping-across-sections.s
> llvm/trunk/test/MC/AArch64/mapping-within-section.s
> llvm/trunk/test/MC/AArch64/neon-2velem.s
> llvm/trunk/test/MC/AArch64/neon-3vdiff.s
> llvm/trunk/test/MC/AArch64/neon-aba-abd.s
> llvm/trunk/test/MC/AArch64/neon-across.s
> llvm/trunk/test/MC/AArch64/neon-add-pairwise.s
> llvm/trunk/test/MC/AArch64/neon-add-sub-instructions.s
> llvm/trunk/test/MC/AArch64/neon-bitwise-instructions.s
> llvm/trunk/test/MC/AArch64/neon-compare-instructions.s
> llvm/trunk/test/MC/AArch64/neon-crypto.s
> llvm/trunk/test/MC/AArch64/neon-diagnostics.s
> llvm/trunk/test/MC/AArch64/neon-extract.s
> llvm/trunk/test/MC/AArch64/neon-facge-facgt.s
> llvm/trunk/test/MC/AArch64/neon-frsqrt-frecp.s
> llvm/trunk/test/MC/AArch64/neon-halving-add-sub.s
> llvm/trunk/test/MC/AArch64/neon-max-min-pairwise.s
> llvm/trunk/test/MC/AArch64/neon-max-min.s
> llvm/trunk/test/MC/AArch64/neon-mla-mls-instructions.s
> llvm/trunk/test/MC/AArch64/neon-mov.s
> llvm/trunk/test/MC/AArch64/neon-mul-div-instructions.s
> llvm/trunk/test/MC/AArch64/neon-perm.s
> llvm/trunk/test/MC/AArch64/neon-rounding-halving-add.s
> llvm/trunk/test/MC/AArch64/neon-rounding-shift.s
> llvm/trunk/test/MC/AArch64/neon-saturating-add-sub.s
> llvm/trunk/test/MC/AArch64/neon-saturating-rounding-shift.s
> llvm/trunk/test/MC/AArch64/neon-saturating-shift.s
> llvm/trunk/test/MC/AArch64/neon-scalar-abs.s
> llvm/trunk/test/MC/AArch64/neon-scalar-add-sub.s
> llvm/trunk/test/MC/AArch64/neon-scalar-by-elem-mla.s
> llvm/trunk/test/MC/AArch64/neon-scalar-by-elem-mul.s
> llvm/trunk/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s
> llvm/trunk/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s
> llvm/trunk/test/MC/AArch64/neon-scalar-compare.s
> llvm/trunk/test/MC/AArch64/neon-scalar-cvt.s
> llvm/trunk/test/MC/AArch64/neon-scalar-dup.s
> llvm/trunk/test/MC/AArch64/neon-scalar-extract-narrow.s
> llvm/trunk/test/MC/AArch64/neon-scalar-fp-compare.s
> llvm/trunk/test/MC/AArch64/neon-scalar-mul.s
> llvm/trunk/test/MC/AArch64/neon-scalar-neg.s
> llvm/trunk/test/MC/AArch64/neon-scalar-recip.s
> llvm/trunk/test/MC/AArch64/neon-scalar-reduce-pairwise.s
> llvm/trunk/test/MC/AArch64/neon-scalar-rounding-shift.s
> llvm/trunk/test/MC/AArch64/neon-scalar-saturating-add-sub.s
> llvm/trunk/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
> llvm/trunk/test/MC/AArch64/neon-scalar-saturating-shift.s
> llvm/trunk/test/MC/AArch64/neon-scalar-shift-imm.s
> llvm/trunk/test/MC/AArch64/neon-scalar-shift.s
> llvm/trunk/test/MC/AArch64/neon-shift-left-long.s
> llvm/trunk/test/MC/AArch64/neon-shift.s
> llvm/trunk/test/MC/AArch64/neon-simd-copy.s
> llvm/trunk/test/MC/AArch64/neon-simd-ldst-multi-elem.s
> llvm/trunk/test/MC/AArch64/neon-simd-ldst-one-elem.s
> llvm/trunk/test/MC/AArch64/neon-simd-misc.s
> llvm/trunk/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s
> llvm/trunk/test/MC/AArch64/neon-simd-shift.s
> llvm/trunk/test/MC/AArch64/neon-sxtl.s
> llvm/trunk/test/MC/AArch64/neon-tbl.s
> llvm/trunk/test/MC/AArch64/neon-uxtl.s
> llvm/trunk/test/MC/AArch64/noneon-diagnostics.s
> llvm/trunk/test/MC/AArch64/optional-hash.s
> llvm/trunk/test/MC/AArch64/tls-relocs.s
> llvm/trunk/test/MC/AArch64/trace-regs-diagnostics.s
> llvm/trunk/test/MC/AArch64/trace-regs.s
> llvm/trunk/test/MC/Disassembler/AArch64/lit.local.cfg
> llvm/trunk/test/Transforms/LoopVectorize/AArch64/lit.local.cfg
>
> Modified: llvm/trunk/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/CMakeLists.txt?rev=209576&r1=209575&r2=209576&view=diff
> ==============================================================================
> --- llvm/trunk/CMakeLists.txt (original)
> +++ llvm/trunk/CMakeLists.txt Sat May 24 07:42:26 2014
> @@ -127,7 +127,6 @@ set(LLVM_INCLUDE_DIR ${CMAKE_CURRENT_BIN
> set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" )
>
> set(LLVM_ALL_TARGETS
> - AArch64
> ARM64
> ARM
> CppBackend
> @@ -144,7 +143,7 @@ set(LLVM_ALL_TARGETS
> )
>
> # List of targets with JIT support:
> -set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM64 ARM Mips SystemZ)
> +set(LLVM_TARGETS_WITH_JIT X86 PowerPC ARM64 ARM Mips SystemZ)
>
> set(LLVM_TARGETS_TO_BUILD "all"
> CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
>
> Modified: llvm/trunk/autoconf/configure.ac
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/autoconf/configure.ac?rev=209576&r1=209575&r2=209576&view=diff
> ==============================================================================
> --- llvm/trunk/autoconf/configure.ac (original)
> +++ llvm/trunk/autoconf/configure.ac Sat May 24 07:42:26 2014
> @@ -421,7 +421,7 @@ AC_CACHE_CHECK([target architecture],[ll
> powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
> arm64*-*) llvm_cv_target_arch="ARM64" ;;
> arm*-*) llvm_cv_target_arch="ARM" ;;
> - aarch64*-*) llvm_cv_target_arch="AArch64" ;;
> + aarch64*-*) llvm_cv_target_arch="ARM64" ;;
> mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
> mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;;
> xcore-*) llvm_cv_target_arch="XCore" ;;
> @@ -457,7 +457,7 @@ case $host in
> powerpc*-*) host_arch="PowerPC" ;;
> arm64*-*) host_arch="ARM64" ;;
> arm*-*) host_arch="ARM" ;;
> - aarch64*-*) host_arch="AArch64" ;;
> + aarch64*-*) host_arch="ARM64" ;;
> mips-* | mips64-*) host_arch="Mips" ;;
> mipsel-* | mips64el-*) host_arch="Mips" ;;
> xcore-*) host_arch="XCore" ;;
> @@ -786,7 +786,6 @@ else
> PowerPC) AC_SUBST(TARGET_HAS_JIT,1) ;;
> x86_64) AC_SUBST(TARGET_HAS_JIT,1) ;;
> ARM) AC_SUBST(TARGET_HAS_JIT,1) ;;
> - AArch64) AC_SUBST(TARGET_HAS_JIT,0) ;;
> Mips) AC_SUBST(TARGET_HAS_JIT,1) ;;
> XCore) AC_SUBST(TARGET_HAS_JIT,0) ;;
> MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;;
> @@ -797,7 +796,7 @@ else
> esac
> fi
>
> -TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86"
> +TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86"
> AC_SUBST(TARGETS_WITH_JIT,$TARGETS_WITH_JIT)
>
> dnl Allow enablement of building and installing docs
> @@ -950,7 +949,7 @@ if test "$llvm_cv_enable_crash_overrides
> fi
>
> dnl List all possible targets
> -ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
> +ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
> AC_SUBST(ALL_TARGETS,$ALL_TARGETS)
>
> dnl Allow specific targets to be specified for building (or not)
> @@ -971,7 +970,7 @@ case "$enableval" in
> x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
> sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
> powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
> - aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
> + aarch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
> arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
> arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
> mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
> @@ -990,7 +989,7 @@ case "$enableval" in
> x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
> Sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
> PowerPC) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
> - AArch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
> + AArch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
> ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
> Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
> XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
>
> Modified: llvm/trunk/configure
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/configure?rev=209576&r1=209575&r2=209576&view=diff
> ==============================================================================
> --- llvm/trunk/configure (original)
> +++ llvm/trunk/configure Sat May 24 07:42:26 2014
> @@ -4153,7 +4153,7 @@ else
> powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
> arm64*-*) llvm_cv_target_arch="ARM64" ;;
> arm*-*) llvm_cv_target_arch="ARM" ;;
> - aarch64*-*) llvm_cv_target_arch="AArch64" ;;
> + aarch64*-*) llvm_cv_target_arch="ARM64" ;;
> mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
> mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;;
> xcore-*) llvm_cv_target_arch="XCore" ;;
> @@ -4190,7 +4190,7 @@ case $host in
> powerpc*-*) host_arch="PowerPC" ;;
> arm64*-*) host_arch="ARM64" ;;
> arm*-*) host_arch="ARM" ;;
> - aarch64*-*) host_arch="AArch64" ;;
> + aarch64*-*) host_arch="ARM64" ;;
> mips-* | mips64-*) host_arch="Mips" ;;
> mipsel-* | mips64el-*) host_arch="Mips" ;;
> xcore-*) host_arch="XCore" ;;
> @@ -5103,8 +5103,6 @@ else
> ;;
> ARM) TARGET_HAS_JIT=1
> ;;
> - AArch64) TARGET_HAS_JIT=0
> - ;;
> Mips) TARGET_HAS_JIT=1
> ;;
> XCore) TARGET_HAS_JIT=0
> @@ -5122,7 +5120,7 @@ else
> esac
> fi
>
> -TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86"
> +TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86"
> TARGETS_WITH_JIT=$TARGETS_WITH_JIT
>
>
> @@ -5359,7 +5357,7 @@ _ACEOF
>
> fi
>
> -ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
> +ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
> ALL_TARGETS=$ALL_TARGETS
>
>
> @@ -5382,7 +5380,7 @@ case "$enableval" in
> x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
> sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
> powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
> - aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
> + aarch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
> arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
> arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
> mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
> @@ -5401,7 +5399,7 @@ case "$enableval" in
> x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
> Sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
> PowerPC) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
> - AArch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
> + AArch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
> ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
> Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
> XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
>
> Modified: llvm/trunk/include/llvm/IR/Intrinsics.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/Intrinsics.td?rev=209576&r1=209575&r2=209576&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/IR/Intrinsics.td (original)
> +++ llvm/trunk/include/llvm/IR/Intrinsics.td Sat May 24 07:42:26 2014
> @@ -534,7 +534,6 @@ include "llvm/IR/IntrinsicsPowerPC.td"
> include "llvm/IR/IntrinsicsX86.td"
> include "llvm/IR/IntrinsicsARM.td"
> include "llvm/IR/IntrinsicsARM64.td"
> -include "llvm/IR/IntrinsicsAArch64.td"
> include "llvm/IR/IntrinsicsXCore.td"
> include "llvm/IR/IntrinsicsHexagon.td"
> include "llvm/IR/IntrinsicsNVVM.td"
>
> Removed: llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td (original)
> +++ llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td (removed)
> @@ -1,407 +0,0 @@
> -//===- IntrinsicsAArch64.td - Defines AArch64 intrinsics -----------*- tablegen -*-===//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file defines all of the AArch64-specific intrinsics.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -//===----------------------------------------------------------------------===//
> -// Advanced SIMD (NEON)
> -
> -let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
> -
> -// Vector Absolute Compare (Floating Point)
> -def int_aarch64_neon_vacgeq :
> - Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vacgtq :
> - Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
> -
> -// Vector saturating accumulate
> -def int_aarch64_neon_suqadd : Neon_2Arg_Intrinsic;
> -def int_aarch64_neon_usqadd : Neon_2Arg_Intrinsic;
> -
> -// Vector Bitwise reverse
> -def int_aarch64_neon_rbit : Neon_1Arg_Intrinsic;
> -
> -// Vector extract and narrow
> -def int_aarch64_neon_xtn :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Vector floating-point convert
> -def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic;
> -def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic;
> -def int_aarch64_neon_vcvtxn :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vcvtzs :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vcvtzu :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Vector maxNum (Floating Point)
> -def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
> -
> -// Vector minNum (Floating Point)
> -def int_aarch64_neon_vminnm : Neon_2Arg_Intrinsic;
> -
> -// Vector Pairwise maxNum (Floating Point)
> -def int_aarch64_neon_vpmaxnm : Neon_2Arg_Intrinsic;
> -
> -// Vector Pairwise minNum (Floating Point)
> -def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
> -
> -// Vector Multiply Extended and Scalar Multiply Extended (Floating Point)
> -def int_aarch64_neon_vmulx :
> - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>;
> -
> -class Neon_N2V_Intrinsic
> - : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty],
> - [IntrNoMem]>;
> -class Neon_N3V_Intrinsic
> - : Intrinsic<[llvm_anyvector_ty],
> - [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
> - [IntrNoMem]>;
> -class Neon_N2V_Narrow_Intrinsic
> - : Intrinsic<[llvm_anyvector_ty],
> - [LLVMExtendedType<0>, llvm_i32_ty],
> - [IntrNoMem]>;
> -
> -// Vector rounding shift right by immediate (Signed)
> -def int_aarch64_neon_vsrshr : Neon_N2V_Intrinsic;
> -def int_aarch64_neon_vurshr : Neon_N2V_Intrinsic;
> -def int_aarch64_neon_vsqshlu : Neon_N2V_Intrinsic;
> -
> -def int_aarch64_neon_vsri : Neon_N3V_Intrinsic;
> -def int_aarch64_neon_vsli : Neon_N3V_Intrinsic;
> -
> -def int_aarch64_neon_vsqshrun : Neon_N2V_Narrow_Intrinsic;
> -def int_aarch64_neon_vrshrn : Neon_N2V_Narrow_Intrinsic;
> -def int_aarch64_neon_vsqrshrun : Neon_N2V_Narrow_Intrinsic;
> -def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic;
> -def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic;
> -def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic;
> -def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic;
> -
> -// Vector across
> -class Neon_Across_Intrinsic
> - : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -def int_aarch64_neon_saddlv : Neon_Across_Intrinsic;
> -def int_aarch64_neon_uaddlv : Neon_Across_Intrinsic;
> -def int_aarch64_neon_smaxv : Neon_Across_Intrinsic;
> -def int_aarch64_neon_umaxv : Neon_Across_Intrinsic;
> -def int_aarch64_neon_sminv : Neon_Across_Intrinsic;
> -def int_aarch64_neon_uminv : Neon_Across_Intrinsic;
> -def int_aarch64_neon_vaddv : Neon_Across_Intrinsic;
> -def int_aarch64_neon_vmaxv :
> - Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vminv :
> - Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vmaxnmv :
> - Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vminnmv :
> - Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
> -
> -// Vector Table Lookup.
> -def int_aarch64_neon_vtbl1 :
> - Intrinsic<[llvm_anyvector_ty],
> - [llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
> -
> -def int_aarch64_neon_vtbl2 :
> - Intrinsic<[llvm_anyvector_ty],
> - [llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
> - [IntrNoMem]>;
> -
> -def int_aarch64_neon_vtbl3 :
> - Intrinsic<[llvm_anyvector_ty],
> - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
> - LLVMMatchType<0>], [IntrNoMem]>;
> -
> -def int_aarch64_neon_vtbl4 :
> - Intrinsic<[llvm_anyvector_ty],
> - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
> - llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
> -
> -// Vector Table Extension.
> -// Some elements of the destination vector may not be updated, so the original
> -// value of that vector is passed as the first argument. The next 1-4
> -// arguments after that are the table.
> -def int_aarch64_neon_vtbx1 :
> - Intrinsic<[llvm_anyvector_ty],
> - [LLVMMatchType<0>, llvm_v16i8_ty, LLVMMatchType<0>],
> - [IntrNoMem]>;
> -
> -def int_aarch64_neon_vtbx2 :
> - Intrinsic<[llvm_anyvector_ty],
> - [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
> - LLVMMatchType<0>], [IntrNoMem]>;
> -
> -def int_aarch64_neon_vtbx3 :
> - Intrinsic<[llvm_anyvector_ty],
> - [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
> - llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
> -
> -def int_aarch64_neon_vtbx4 :
> - Intrinsic<[llvm_anyvector_ty],
> - [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
> - llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
> - [IntrNoMem]>;
> -
> -// Vector Load/store
> -def int_aarch64_neon_vld1x2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
> - [llvm_ptr_ty, llvm_i32_ty],
> - [IntrReadArgMem]>;
> -def int_aarch64_neon_vld1x3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
> - LLVMMatchType<0>],
> - [llvm_ptr_ty, llvm_i32_ty],
> - [IntrReadArgMem]>;
> -def int_aarch64_neon_vld1x4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
> - LLVMMatchType<0>, LLVMMatchType<0>],
> - [llvm_ptr_ty, llvm_i32_ty],
> - [IntrReadArgMem]>;
> -
> -def int_aarch64_neon_vst1x2 : Intrinsic<[],
> - [llvm_ptr_ty, llvm_anyvector_ty,
> - LLVMMatchType<0>, llvm_i32_ty],
> - [IntrReadWriteArgMem]>;
> -def int_aarch64_neon_vst1x3 : Intrinsic<[],
> - [llvm_ptr_ty, llvm_anyvector_ty,
> - LLVMMatchType<0>, LLVMMatchType<0>,
> - llvm_i32_ty], [IntrReadWriteArgMem]>;
> -def int_aarch64_neon_vst1x4 : Intrinsic<[],
> - [llvm_ptr_ty, llvm_anyvector_ty,
> - LLVMMatchType<0>, LLVMMatchType<0>,
> - LLVMMatchType<0>, llvm_i32_ty],
> - [IntrReadWriteArgMem]>;
> -
> -// Scalar Add
> -def int_aarch64_neon_vaddds :
> - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vadddu :
> - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -
> -
> -// Scalar Sub
> -def int_aarch64_neon_vsubds :
> - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vsubdu :
> - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -
> -
> -// Scalar Shift
> -// Scalar Shift Left
> -def int_aarch64_neon_vshlds :
> - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vshldu :
> - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -
> -// Scalar Saturating Shift Left
> -def int_aarch64_neon_vqshls : Neon_2Arg_Intrinsic;
> -def int_aarch64_neon_vqshlu : Neon_2Arg_Intrinsic;
> -
> -// Scalar Shift Rouding Left
> -def int_aarch64_neon_vrshlds :
> - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vrshldu :
> - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -
> -// Scalar Saturating Rounding Shift Left
> -def int_aarch64_neon_vqrshls : Neon_2Arg_Intrinsic;
> -def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic;
> -
> -// Scalar Reduce Pairwise Add.
> -def int_aarch64_neon_vpadd :
> - Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>;
> -def int_aarch64_neon_vpfadd :
> - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Scalar Reduce Pairwise Floating Point Max/Min.
> -def int_aarch64_neon_vpmax :
> - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vpmin :
> - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Scalar Reduce Pairwise Floating Point Maxnm/Minnm.
> -def int_aarch64_neon_vpfmaxnm :
> - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -def int_aarch64_neon_vpfminnm :
> - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Scalar Signed Integer Convert To Floating-point
> -def int_aarch64_neon_vcvtint2fps :
> - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Scalar Unsigned Integer Convert To Floating-point
> -def int_aarch64_neon_vcvtint2fpu :
> - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
> -
> -// Scalar Floating-point Convert
> -def int_aarch64_neon_fcvtxn :
> - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtns :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtnu :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtps :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtpu :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtms :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtmu :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtas :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtau :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtzs :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -def int_aarch64_neon_fcvtzu :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
> -
> -// Scalar Floating-point Reciprocal Estimate.
> -def int_aarch64_neon_vrecpe :
> - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
> -
> -// Scalar Floating-point Reciprocal Exponent
> -def int_aarch64_neon_vrecpx :
> - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
> -
> -// Scalar Floating-point Reciprocal Square Root Estimate
> -def int_aarch64_neon_vrsqrte :
> - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
> -
> -// Scalar Floating-point Reciprocal Step
> -def int_aarch64_neon_vrecps :
> - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
> - [IntrNoMem]>;
> -
> -// Scalar Floating-point Reciprocal Square Root Step
> -def int_aarch64_neon_vrsqrts :
> - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
> - [IntrNoMem]>;
> -
> -// Compare with vector operands.
> -class Neon_Cmp_Intrinsic :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyvector_ty],
> - [IntrNoMem]>;
> -
> -// Floating-point compare with scalar operands.
> -class Neon_Float_Cmp_Intrinsic :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_anyfloat_ty],
> - [IntrNoMem]>;
> -
> -// Scalar Compare Equal
> -def int_aarch64_neon_vceq : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fceq : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Compare Greater-Than or Equal
> -def int_aarch64_neon_vcge : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_vchs : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fcge : Neon_Float_Cmp_Intrinsic;
> -def int_aarch64_neon_fchs : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Compare Less-Than or Equal
> -def int_aarch64_neon_vclez : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fclez : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Compare Less-Than
> -def int_aarch64_neon_vcltz : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fcltz : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Compare Greater-Than
> -def int_aarch64_neon_vcgt : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_vchi : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fcgt : Neon_Float_Cmp_Intrinsic;
> -def int_aarch64_neon_fchi : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Compare Bitwise Test Bits
> -def int_aarch64_neon_vtstd : Neon_Cmp_Intrinsic;
> -
> -// Scalar Floating-point Absolute Compare Greater Than Or Equal
> -def int_aarch64_neon_vcage : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fcage : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Floating-point Absolute Compare Greater Than
> -def int_aarch64_neon_vcagt : Neon_Cmp_Intrinsic;
> -def int_aarch64_neon_fcagt : Neon_Float_Cmp_Intrinsic;
> -
> -// Scalar Signed Saturating Accumulated of Unsigned Value
> -def int_aarch64_neon_vuqadd : Neon_2Arg_Intrinsic;
> -
> -// Scalar Unsigned Saturating Accumulated of Signed Value
> -def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic;
> -
> -// Scalar Absolute Value
> -def int_aarch64_neon_vabs :
> - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
> -
> -// Scalar Absolute Difference
> -def int_aarch64_neon_vabd :
> - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
> - [IntrNoMem]>;
> -
> -// Scalar Negate Value
> -def int_aarch64_neon_vneg :
> - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
> -
> -// Signed Saturating Doubling Multiply-Add Long
> -def int_aarch64_neon_vqdmlal : Neon_3Arg_Long_Intrinsic;
> -
> -// Signed Saturating Doubling Multiply-Subtract Long
> -def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
> -
> -def int_aarch64_neon_vmull_p64 :
> - Intrinsic<[llvm_v16i8_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
> -
> -class Neon_2Arg_ShiftImm_Intrinsic
> - : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
> -
> -class Neon_3Arg_ShiftImm_Intrinsic
> - : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty, llvm_i32_ty],
> - [IntrNoMem]>;
> -
> -// Scalar Shift Right (Immediate)
> -def int_aarch64_neon_vshrds_n : Neon_2Arg_ShiftImm_Intrinsic;
> -def int_aarch64_neon_vshrdu_n : Neon_2Arg_ShiftImm_Intrinsic;
> -
> -// Scalar Shift Right and Accumulate (Immediate)
> -def int_aarch64_neon_vsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
> -def int_aarch64_neon_vsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
> -
> -// Scalar Rounding Shift Right and Accumulate (Immediate)
> -def int_aarch64_neon_vrsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
> -def int_aarch64_neon_vrsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
> -
> -// Scalar Shift Left (Immediate)
> -def int_aarch64_neon_vshld_n : Neon_2Arg_ShiftImm_Intrinsic;
> -
> -// Scalar Saturating Shift Left (Immediate)
> -def int_aarch64_neon_vqshls_n : Neon_N2V_Intrinsic;
> -def int_aarch64_neon_vqshlu_n : Neon_N2V_Intrinsic;
> -
> -// Scalar Signed Saturating Shift Left Unsigned (Immediate)
> -def int_aarch64_neon_vqshlus_n : Neon_N2V_Intrinsic;
> -
> -// Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
> -def int_aarch64_neon_vcvtfxs2fp_n :
> - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem]>;
> -
> -// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
> -def int_aarch64_neon_vcvtfxu2fp_n :
> - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem]>;
> -
> -// Scalar Floating-point Convert To Signed Fixed-point (Immediate)
> -def int_aarch64_neon_vcvtfp2fxs_n :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
> -
> -// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
> -def int_aarch64_neon_vcvtfp2fxu_n :
> - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
> -
> -}
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.h?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64.h (removed)
> @@ -1,46 +0,0 @@
> -//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file contains the entry points for global functions defined in the LLVM
> -// AArch64 back-end.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#ifndef LLVM_TARGET_AARCH64_H
> -#define LLVM_TARGET_AARCH64_H
> -
> -#include "MCTargetDesc/AArch64MCTargetDesc.h"
> -#include "llvm/Target/TargetMachine.h"
> -
> -namespace llvm {
> -
> -class AArch64AsmPrinter;
> -class FunctionPass;
> -class AArch64TargetMachine;
> -class MachineInstr;
> -class MCInst;
> -
> -FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM,
> - CodeGenOpt::Level OptLevel);
> -
> -FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
> -
> -FunctionPass *createAArch64BranchFixupPass();
> -
> -/// \brief Creates an AArch64-specific Target Transformation Info pass.
> -ImmutablePass *createAArch64TargetTransformInfoPass(
> - const AArch64TargetMachine *TM);
> -
> -void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
> - AArch64AsmPrinter &AP);
> -
> -
> -}
> -
> -#endif
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64.td (removed)
> @@ -1,83 +0,0 @@
> -//===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This is the top level entry point for the AArch64 target.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -//===----------------------------------------------------------------------===//
> -// Target-independent interfaces
> -//===----------------------------------------------------------------------===//
> -
> -include "llvm/Target/Target.td"
> -
> -//===----------------------------------------------------------------------===//
> -// AArch64 Subtarget features.
> -//
> -
> -def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true",
> - "Enable ARMv8 FP">;
> -
> -def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
> - "Enable Advanced SIMD instructions", [FeatureFPARMv8]>;
> -
> -def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
> - "Enable cryptographic instructions">;
> -
> -//===----------------------------------------------------------------------===//
> -// AArch64 Processors
> -//
> -
> -include "AArch64Schedule.td"
> -
> -class ProcNoItin<string Name, list<SubtargetFeature> Features>
> - : Processor<Name, NoItineraries, Features>;
> -
> -def : Processor<"generic", GenericItineraries, [FeatureFPARMv8, FeatureNEON]>;
> -
> -def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
> - "Cortex-A53 ARM processors",
> - [FeatureFPARMv8,
> - FeatureNEON,
> - FeatureCrypto]>;
> -
> -def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
> - "Cortex-A57 ARM processors",
> - [FeatureFPARMv8,
> - FeatureNEON,
> - FeatureCrypto]>;
> -
> -def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
> -def : Processor<"cortex-a57", NoItineraries, [ProcA57]>;
> -
> -//===----------------------------------------------------------------------===//
> -// Register File Description
> -//===----------------------------------------------------------------------===//
> -
> -include "AArch64RegisterInfo.td"
> -
> -include "AArch64CallingConv.td"
> -
> -//===----------------------------------------------------------------------===//
> -// Instruction Descriptions
> -//===----------------------------------------------------------------------===//
> -
> -include "AArch64InstrInfo.td"
> -
> -def AArch64InstrInfo : InstrInfo {
> - let noNamedPositionallyEncodedOperands = 1;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Declare the target which we are implementing
> -//===----------------------------------------------------------------------===//
> -
> -def AArch64 : Target {
> - let InstructionSet = AArch64InstrInfo;
> -}
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp (removed)
> @@ -1,303 +0,0 @@
> -//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file contains a printer that converts from our internal representation
> -// of machine-dependent LLVM code to GAS-format AArch64 assembly language.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#include "AArch64AsmPrinter.h"
> -#include "InstPrinter/AArch64InstPrinter.h"
> -#include "llvm/ADT/SmallString.h"
> -#include "llvm/CodeGen/MachineModuleInfoImpls.h"
> -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
> -#include "llvm/IR/DebugInfo.h"
> -#include "llvm/IR/Mangler.h"
> -#include "llvm/MC/MCAsmInfo.h"
> -#include "llvm/MC/MCInst.h"
> -#include "llvm/MC/MCSymbol.h"
> -#include "llvm/Support/TargetRegistry.h"
> -
> -using namespace llvm;
> -
> -#define DEBUG_TYPE "asm-printer"
> -
> -/// Try to print a floating-point register as if it belonged to a specified
> -/// register-class. For example the inline asm operand modifier "b" requires its
> -/// argument to be printed as "bN".
> -static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
> - const TargetRegisterInfo *TRI,
> - char RegType, raw_ostream &O) {
> - if (!MO.isReg())
> - return true;
> -
> - for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
> - if (AArch64::FPR8RegClass.contains(*AR)) {
> - O << RegType << TRI->getEncodingValue(MO.getReg());
> - return false;
> - }
> - }
> -
> - // The register doesn't correspond to anything floating-point like.
> - return true;
> -}
> -
> -/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR
> -/// with the obvious type and an immediate 0 as either wzr or xzr.
> -static bool printModifiedGPRAsmOperand(const MachineOperand &MO,
> - const TargetRegisterInfo *TRI,
> - const TargetRegisterClass &RegClass,
> - raw_ostream &O) {
> - char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x';
> -
> - if (MO.isImm() && MO.getImm() == 0) {
> - O << Prefix << "zr";
> - return false;
> - } else if (MO.isReg()) {
> - if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) {
> - O << (Prefix == 'x' ? "sp" : "wsp");
> - return false;
> - }
> -
> - for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
> - if (RegClass.contains(*AR)) {
> - O << AArch64InstPrinter::getRegisterName(*AR);
> - return false;
> - }
> - }
> - }
> -
> - return true;
> -}
> -
> -bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO,
> - bool PrintImmediatePrefix,
> - StringRef Suffix, raw_ostream &O) {
> - StringRef Name;
> - StringRef Modifier;
> - switch (MO.getType()) {
> - default:
> - return true;
> - case MachineOperand::MO_GlobalAddress:
> - Name = getSymbol(MO.getGlobal())->getName();
> -
> - // Global variables may be accessed either via a GOT or in various fun and
> - // interesting TLS-model specific ways. Set the prefix modifier as
> - // appropriate here.
> - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) {
> - Reloc::Model RelocM = TM.getRelocationModel();
> - if (GV->isThreadLocal()) {
> - switch (TM.getTLSModel(GV)) {
> - case TLSModel::GeneralDynamic:
> - Modifier = "tlsdesc";
> - break;
> - case TLSModel::LocalDynamic:
> - Modifier = "dtprel";
> - break;
> - case TLSModel::InitialExec:
> - Modifier = "gottprel";
> - break;
> - case TLSModel::LocalExec:
> - Modifier = "tprel";
> - break;
> - }
> - } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
> - Modifier = "got";
> - }
> - }
> - break;
> - case MachineOperand::MO_BlockAddress:
> - Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName();
> - break;
> - case MachineOperand::MO_ConstantPoolIndex:
> - Name = GetCPISymbol(MO.getIndex())->getName();
> - break;
> - }
> -
> - // Some instructions (notably ADRP) don't take the # prefix for
> - // immediates. Only print it if asked to.
> - if (PrintImmediatePrefix)
> - O << '#';
> -
> - // Only need the joining "_" if both the prefix and the suffix are
> - // non-null. This little block simply takes care of the four possibly
> - // combinations involved there.
> - if (Modifier == "" && Suffix == "")
> - O << Name;
> - else if (Modifier == "" && Suffix != "")
> - O << ":" << Suffix << ':' << Name;
> - else if (Modifier != "" && Suffix == "")
> - O << ":" << Modifier << ':' << Name;
> - else
> - O << ":" << Modifier << '_' << Suffix << ':' << Name;
> -
> - return false;
> -}
> -
> -bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
> - unsigned AsmVariant,
> - const char *ExtraCode, raw_ostream &O) {
> - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
> -
> - if (!ExtraCode)
> - ExtraCode = "";
> -
> - switch(ExtraCode[0]) {
> - default:
> - if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O))
> - return false;
> - break;
> - case 'w':
> - // Output 32-bit general register operand, constant zero as wzr, or stack
> - // pointer as wsp. Ignored when used with other operand types.
> - if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
> - AArch64::GPR32RegClass, O))
> - return false;
> - break;
> - case 'x':
> - // Output 64-bit general register operand, constant zero as xzr, or stack
> - // pointer as sp. Ignored when used with other operand types.
> - if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
> - AArch64::GPR64RegClass, O))
> - return false;
> - break;
> - case 'H':
> - // Output higher numbered of a 64-bit general register pair
> - case 'Q':
> - // Output least significant register of a 64-bit general register pair
> - case 'R':
> - // Output most significant register of a 64-bit general register pair
> -
> - // FIXME note: these three operand modifiers will require, to some extent,
> - // adding a paired GPR64 register class. Initial investigation suggests that
> - // assertions are hit unless it has a type and is made legal for that type
> - // in ISelLowering. After that step is made, the number of modifications
> - // needed explodes (operation legality, calling conventions, stores, reg
> - // copies ...).
> - llvm_unreachable("FIXME: Unimplemented register pairs");
> - case 'b':
> - case 'h':
> - case 's':
> - case 'd':
> - case 'q':
> - if (!printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
> - ExtraCode[0], O))
> - return false;
> - break;
> - case 'A':
> - // Output symbolic address with appropriate relocation modifier (also
> - // suitable for ADRP).
> - if (!printSymbolicAddress(MI->getOperand(OpNum), false, "", O))
> - return false;
> - break;
> - case 'L':
> - // Output bits 11:0 of symbolic address with appropriate :lo12: relocation
> - // modifier.
> - if (!printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O))
> - return false;
> - break;
> - case 'G':
> - // Output bits 23:12 of symbolic address with appropriate :hi12: relocation
> - // modifier (currently only for TLS local exec).
> - if (!printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O))
> - return false;
> - break;
> - case 'a':
> - return PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O);
> - }
> -
> - // There's actually no operand modifier, which leads to a slightly eclectic
> - // set of behaviour which we have to handle here.
> - const MachineOperand &MO = MI->getOperand(OpNum);
> - switch (MO.getType()) {
> - default:
> - llvm_unreachable("Unexpected operand for inline assembly");
> - case MachineOperand::MO_Register:
> - // GCC prints the unmodified operand of a 'w' constraint as the vector
> - // register. Technically, we could allocate the argument as a VPR128, but
> - // that leads to extremely dodgy copies being generated to get the data
> - // there.
> - if (printModifiedFPRAsmOperand(MO, TRI, 'v', O))
> - O << AArch64InstPrinter::getRegisterName(MO.getReg());
> - break;
> - case MachineOperand::MO_Immediate:
> - O << '#' << MO.getImm();
> - break;
> - case MachineOperand::MO_FPImmediate:
> - assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
> - O << "#0.0";
> - break;
> - case MachineOperand::MO_BlockAddress:
> - case MachineOperand::MO_ConstantPoolIndex:
> - case MachineOperand::MO_GlobalAddress:
> - return printSymbolicAddress(MO, false, "", O);
> - }
> -
> - return false;
> -}
> -
> -bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
> - unsigned OpNum,
> - unsigned AsmVariant,
> - const char *ExtraCode,
> - raw_ostream &O) {
> - // Currently both the memory constraints (m and Q) behave the same and amount
> - // to the address as a single register. In future, we may allow "m" to provide
> - // both a base and an offset.
> - const MachineOperand &MO = MI->getOperand(OpNum);
> - assert(MO.isReg() && "unexpected inline assembly memory operand");
> - O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']';
> - return false;
> -}
> -
> -#include "AArch64GenMCPseudoLowering.inc"
> -
> -void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
> - // Do any auto-generated pseudo lowerings.
> - if (emitPseudoExpansionLowering(OutStreamer, MI))
> - return;
> -
> - MCInst TmpInst;
> - LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this);
> - EmitToStreamer(OutStreamer, TmpInst);
> -}
> -
> -void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
> - if (Subtarget->isTargetELF()) {
> - const TargetLoweringObjectFileELF &TLOFELF =
> - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
> -
> - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
> -
> - // Output stubs for external and common global variables.
> - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
> - if (!Stubs.empty()) {
> - OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
> - const DataLayout *TD = TM.getDataLayout();
> -
> - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
> - OutStreamer.EmitLabel(Stubs[i].first);
> - OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
> - TD->getPointerSize(0));
> - }
> - Stubs.clear();
> - }
> - }
> -}
> -
> -bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
> - return AsmPrinter::runOnMachineFunction(MF);
> -}
> -
> -// Force static initialization.
> -extern "C" void LLVMInitializeAArch64AsmPrinter() {
> - RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64leTarget);
> - RegisterAsmPrinter<AArch64AsmPrinter> Y(TheAArch64beTarget);
> -}
> -
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.h?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.h (removed)
> @@ -1,76 +0,0 @@
> -// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file defines the AArch64 assembly printer class.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#ifndef LLVM_AARCH64ASMPRINTER_H
> -#define LLVM_AARCH64ASMPRINTER_H
> -
> -#include "AArch64.h"
> -#include "AArch64TargetMachine.h"
> -#include "llvm/CodeGen/AsmPrinter.h"
> -#include "llvm/MC/MCStreamer.h"
> -#include "llvm/Support/Compiler.h"
> -
> -namespace llvm {
> -
> -class MCOperand;
> -
> -class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter {
> -
> - /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
> - /// make the right decision when printing asm code for different targets.
> - const AArch64Subtarget *Subtarget;
> -
> - // emitPseudoExpansionLowering - tblgen'erated.
> - bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
> - const MachineInstr *MI);
> -
> - public:
> - explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
> - : AsmPrinter(TM, Streamer) {
> - Subtarget = &TM.getSubtarget<AArch64Subtarget>();
> - }
> -
> - bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
> -
> - MCOperand lowerSymbolOperand(const MachineOperand &MO,
> - const MCSymbol *Sym) const;
> -
> - void EmitInstruction(const MachineInstr *MI) override;
> - void EmitEndOfAsmFile(Module &M) override;
> -
> - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
> - unsigned AsmVariant, const char *ExtraCode,
> - raw_ostream &O) override;
> - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
> - unsigned AsmVariant, const char *ExtraCode,
> - raw_ostream &O) override;
> -
> - /// printSymbolicAddress - Given some kind of reasonably bare symbolic
> - /// reference, print out the appropriate asm string to represent it. If
> - /// appropriate, a relocation-specifier will be produced, composed of a
> - /// general class derived from the MO parameter and an instruction-specific
> - /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is
> - /// given.
> - bool printSymbolicAddress(const MachineOperand &MO,
> - bool PrintImmediatePrefix,
> - StringRef Suffix, raw_ostream &O);
> -
> - const char *getPassName() const override {
> - return "AArch64 Assembly Printer";
> - }
> -
> - bool runOnMachineFunction(MachineFunction &MF) override;
> -};
> -} // end namespace llvm
> -
> -#endif
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64BranchFixupPass.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64BranchFixupPass.cpp?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64BranchFixupPass.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64BranchFixupPass.cpp (removed)
> @@ -1,601 +0,0 @@
> -//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file contains a pass that fixes AArch64 branches which have ended up out
> -// of range for their immediate operands.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#include "AArch64.h"
> -#include "AArch64InstrInfo.h"
> -#include "Utils/AArch64BaseInfo.h"
> -#include "llvm/ADT/Statistic.h"
> -#include "llvm/CodeGen/MachineFunctionPass.h"
> -#include "llvm/CodeGen/MachineInstrBuilder.h"
> -#include "llvm/CodeGen/MachineRegisterInfo.h"
> -#include "llvm/Support/Debug.h"
> -#include "llvm/Support/Format.h"
> -#include "llvm/Support/raw_ostream.h"
> -using namespace llvm;
> -
> -#define DEBUG_TYPE "aarch64-branch-fixup"
> -
> -STATISTIC(NumSplit, "Number of uncond branches inserted");
> -STATISTIC(NumCBrFixed, "Number of cond branches fixed");
> -
> -/// Return the worst case padding that could result from unknown offset bits.
> -/// This does not include alignment padding caused by known offset bits.
> -///
> -/// @param LogAlign log2(alignment)
> -/// @param KnownBits Number of known low offset bits.
> -static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
> - if (KnownBits < LogAlign)
> - return (1u << LogAlign) - (1u << KnownBits);
> - return 0;
> -}
> -
> -namespace {
> - /// Due to limited PC-relative displacements, conditional branches to distant
> - /// blocks may need converting into an unconditional equivalent. For example:
> - /// tbz w1, #0, far_away
> - /// becomes
> - /// tbnz w1, #0, skip
> - /// b far_away
> - /// skip:
> - class AArch64BranchFixup : public MachineFunctionPass {
> - /// Information about the offset and size of a single basic block.
> - struct BasicBlockInfo {
> - /// Distance from the beginning of the function to the beginning of this
> - /// basic block.
> - ///
> - /// Offsets are computed assuming worst case padding before an aligned
> - /// block. This means that subtracting basic block offsets always gives a
> - /// conservative estimate of the real distance which may be smaller.
> - ///
> - /// Because worst case padding is used, the computed offset of an aligned
> - /// block may not actually be aligned.
> - unsigned Offset;
> -
> - /// Size of the basic block in bytes. If the block contains inline
> - /// assembly, this is a worst case estimate.
> - ///
> - /// The size does not include any alignment padding whether from the
> - /// beginning of the block, or from an aligned jump table at the end.
> - unsigned Size;
> -
> - /// The number of low bits in Offset that are known to be exact. The
> - /// remaining bits of Offset are an upper bound.
> - uint8_t KnownBits;
> -
> - /// When non-zero, the block contains instructions (inline asm) of unknown
> - /// size. The real size may be smaller than Size bytes by a multiple of 1
> - /// << Unalign.
> - uint8_t Unalign;
> -
> - BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {}
> -
> - /// Compute the number of known offset bits internally to this block.
> - /// This number should be used to predict worst case padding when
> - /// splitting the block.
> - unsigned internalKnownBits() const {
> - unsigned Bits = Unalign ? Unalign : KnownBits;
> - // If the block size isn't a multiple of the known bits, assume the
> - // worst case padding.
> - if (Size & ((1u << Bits) - 1))
> - Bits = countTrailingZeros(Size);
> - return Bits;
> - }
> -
> - /// Compute the offset immediately following this block. If LogAlign is
> - /// specified, return the offset the successor block will get if it has
> - /// this alignment.
> - unsigned postOffset(unsigned LogAlign = 0) const {
> - unsigned PO = Offset + Size;
> - if (!LogAlign)
> - return PO;
> - // Add alignment padding from the terminator.
> - return PO + UnknownPadding(LogAlign, internalKnownBits());
> - }
> -
> - /// Compute the number of known low bits of postOffset. If this block
> - /// contains inline asm, the number of known bits drops to the
> - /// instruction alignment. An aligned terminator may increase the number
> - /// of know bits.
> - /// If LogAlign is given, also consider the alignment of the next block.
> - unsigned postKnownBits(unsigned LogAlign = 0) const {
> - return std::max(LogAlign, internalKnownBits());
> - }
> - };
> -
> - std::vector<BasicBlockInfo> BBInfo;
> -
> - /// One per immediate branch, keeping the machine instruction pointer,
> - /// conditional or unconditional, the max displacement, and (if IsCond is
> - /// true) the corresponding inverted branch opcode.
> - struct ImmBranch {
> - MachineInstr *MI;
> - unsigned OffsetBits : 31;
> - bool IsCond : 1;
> - ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond)
> - : MI(mi), OffsetBits(offsetbits), IsCond(cond) {}
> - };
> -
> - /// Keep track of all the immediate branch instructions.
> - ///
> - std::vector<ImmBranch> ImmBranches;
> -
> - MachineFunction *MF;
> - const AArch64InstrInfo *TII;
> - public:
> - static char ID;
> - AArch64BranchFixup() : MachineFunctionPass(ID) {}
> -
> - bool runOnMachineFunction(MachineFunction &MF) override;
> -
> - const char *getPassName() const override {
> - return "AArch64 branch fixup pass";
> - }
> -
> - private:
> - void initializeFunctionInfo();
> - MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
> - void adjustBBOffsetsAfter(MachineBasicBlock *BB);
> - bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB,
> - unsigned OffsetBits);
> - bool fixupImmediateBr(ImmBranch &Br);
> - bool fixupConditionalBr(ImmBranch &Br);
> -
> - void computeBlockSize(MachineBasicBlock *MBB);
> - unsigned getOffsetOf(MachineInstr *MI) const;
> - void dumpBBs();
> - void verify();
> - };
> - char AArch64BranchFixup::ID = 0;
> -}
> -
> -/// check BBOffsets
> -void AArch64BranchFixup::verify() {
> -#ifndef NDEBUG
> - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
> - MBBI != E; ++MBBI) {
> - MachineBasicBlock *MBB = MBBI;
> - unsigned MBBId = MBB->getNumber();
> - assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
> - }
> -#endif
> -}
> -
> -/// print block size and offset information - debugging
> -void AArch64BranchFixup::dumpBBs() {
> - DEBUG({
> - for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
> - const BasicBlockInfo &BBI = BBInfo[J];
> - dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
> - << " kb=" << unsigned(BBI.KnownBits)
> - << " ua=" << unsigned(BBI.Unalign)
> - << format(" size=%#x\n", BBInfo[J].Size);
> - }
> - });
> -}
> -
> -/// Returns an instance of the branch fixup pass.
> -FunctionPass *llvm::createAArch64BranchFixupPass() {
> - return new AArch64BranchFixup();
> -}
> -
> -bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) {
> - MF = &mf;
> - DEBUG(dbgs() << "***** AArch64BranchFixup ******");
> - TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo();
> -
> - // This pass invalidates liveness information when it splits basic blocks.
> - MF->getRegInfo().invalidateLiveness();
> -
> - // Renumber all of the machine basic blocks in the function, guaranteeing that
> - // the numbers agree with the position of the block in the function.
> - MF->RenumberBlocks();
> -
> - // Do the initial scan of the function, building up information about the
> - // sizes of each block and location of each immediate branch.
> - initializeFunctionInfo();
> -
> - // Iteratively fix up branches until there is no change.
> - unsigned NoBRIters = 0;
> - bool MadeChange = false;
> - while (true) {
> - DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n');
> - bool BRChange = false;
> - for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
> - BRChange |= fixupImmediateBr(ImmBranches[i]);
> - if (BRChange && ++NoBRIters > 30)
> - report_fatal_error("Branch Fix Up pass failed to converge!");
> - DEBUG(dumpBBs());
> -
> - if (!BRChange)
> - break;
> - MadeChange = true;
> - }
> -
> - // After a while, this might be made debug-only, but it is not expensive.
> - verify();
> -
> - DEBUG(dbgs() << '\n'; dumpBBs());
> -
> - BBInfo.clear();
> - ImmBranches.clear();
> -
> - return MadeChange;
> -}
> -
> -/// Return true if the specified basic block can fallthrough into the block
> -/// immediately after it.
> -static bool BBHasFallthrough(MachineBasicBlock *MBB) {
> - // Get the next machine basic block in the function.
> - MachineFunction::iterator MBBI = MBB;
> - // Can't fall off end of function.
> - if (std::next(MBBI) == MBB->getParent()->end())
> - return false;
> -
> - MachineBasicBlock *NextBB = std::next(MBBI);
> - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
> - E = MBB->succ_end(); I != E; ++I)
> - if (*I == NextBB)
> - return true;
> -
> - return false;
> -}
> -
> -/// Do the initial scan of the function, building up information about the sizes
> -/// of each block, and each immediate branch.
> -void AArch64BranchFixup::initializeFunctionInfo() {
> - BBInfo.clear();
> - BBInfo.resize(MF->getNumBlockIDs());
> -
> - // First thing, compute the size of all basic blocks, and see if the function
> - // has any inline assembly in it. If so, we have to be conservative about
> - // alignment assumptions, as we don't know for sure the size of any
> - // instructions in the inline assembly.
> - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
> - computeBlockSize(I);
> -
> - // The known bits of the entry block offset are determined by the function
> - // alignment.
> - BBInfo.front().KnownBits = MF->getAlignment();
> -
> - // Compute block offsets and known bits.
> - adjustBBOffsetsAfter(MF->begin());
> -
> - // Now go back through the instructions and build up our data structures.
> - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
> - MBBI != E; ++MBBI) {
> - MachineBasicBlock &MBB = *MBBI;
> -
> - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
> - I != E; ++I) {
> - if (I->isDebugValue())
> - continue;
> -
> - int Opc = I->getOpcode();
> - if (I->isBranch()) {
> - bool IsCond = false;
> -
> - // The offsets encoded in instructions here scale by the instruction
> - // size (4 bytes), effectively increasing their range by 2 bits.
> - unsigned Bits = 0;
> - switch (Opc) {
> - default:
> - continue; // Ignore other JT branches
> - case AArch64::TBZxii:
> - case AArch64::TBZwii:
> - case AArch64::TBNZxii:
> - case AArch64::TBNZwii:
> - IsCond = true;
> - Bits = 14 + 2;
> - break;
> - case AArch64::Bcc:
> - case AArch64::CBZx:
> - case AArch64::CBZw:
> - case AArch64::CBNZx:
> - case AArch64::CBNZw:
> - IsCond = true;
> - Bits = 19 + 2;
> - break;
> - case AArch64::Bimm:
> - Bits = 26 + 2;
> - break;
> - }
> -
> - // Record this immediate branch.
> - ImmBranches.push_back(ImmBranch(I, Bits, IsCond));
> - }
> - }
> - }
> -}
> -
> -/// Compute the size and some alignment information for MBB. This function
> -/// updates BBInfo directly.
> -void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) {
> - BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
> - BBI.Size = 0;
> - BBI.Unalign = 0;
> -
> - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
> - ++I) {
> - BBI.Size += TII->getInstSizeInBytes(*I);
> - // For inline asm, GetInstSizeInBytes returns a conservative estimate.
> - // The actual size may be smaller, but still a multiple of the instr size.
> - if (I->isInlineAsm())
> - BBI.Unalign = 2;
> - }
> -}
> -
> -/// Return the current offset of the specified machine instruction from the
> -/// start of the function. This offset changes as stuff is moved around inside
> -/// the function.
> -unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const {
> - MachineBasicBlock *MBB = MI->getParent();
> -
> - // The offset is composed of two things: the sum of the sizes of all MBB's
> - // before this instruction's block, and the offset from the start of the block
> - // it is in.
> - unsigned Offset = BBInfo[MBB->getNumber()].Offset;
> -
> - // Sum instructions before MI in MBB.
> - for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
> - assert(I != MBB->end() && "Didn't find MI in its own basic block?");
> - Offset += TII->getInstSizeInBytes(*I);
> - }
> - return Offset;
> -}
> -
> -/// Split the basic block containing MI into two blocks, which are joined by
> -/// an unconditional branch. Update data structures and renumber blocks to
> -/// account for this change and returns the newly created block.
> -MachineBasicBlock *
> -AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) {
> - MachineBasicBlock *OrigBB = MI->getParent();
> -
> - // Create a new MBB for the code after the OrigBB.
> - MachineBasicBlock *NewBB =
> - MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
> - MachineFunction::iterator MBBI = OrigBB; ++MBBI;
> - MF->insert(MBBI, NewBB);
> -
> - // Splice the instructions starting with MI over to NewBB.
> - NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
> -
> - // Add an unconditional branch from OrigBB to NewBB.
> - // Note the new unconditional branch is not being recorded.
> - // There doesn't seem to be meaningful DebugInfo available; this doesn't
> - // correspond to anything in the source.
> - BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB);
> - ++NumSplit;
> -
> - // Update the CFG. All succs of OrigBB are now succs of NewBB.
> - NewBB->transferSuccessors(OrigBB);
> -
> - // OrigBB branches to NewBB.
> - OrigBB->addSuccessor(NewBB);
> -
> - // Update internal data structures to account for the newly inserted MBB.
> - MF->RenumberBlocks(NewBB);
> -
> - // Insert an entry into BBInfo to align it properly with the (newly
> - // renumbered) block numbers.
> - BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
> -
> - // Figure out how large the OrigBB is. As the first half of the original
> - // block, it cannot contain a tablejump. The size includes
> - // the new jump we added. (It should be possible to do this without
> - // recounting everything, but it's very confusing, and this is rarely
> - // executed.)
> - computeBlockSize(OrigBB);
> -
> - // Figure out how large the NewMBB is. As the second half of the original
> - // block, it may contain a tablejump.
> - computeBlockSize(NewBB);
> -
> - // All BBOffsets following these blocks must be modified.
> - adjustBBOffsetsAfter(OrigBB);
> -
> - return NewBB;
> -}
> -
> -void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
> - unsigned BBNum = BB->getNumber();
> - for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
> - // Get the offset and known bits at the end of the layout predecessor.
> - // Include the alignment of the current block.
> - unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
> - unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
> - unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
> -
> - // This is where block i begins. Stop if the offset is already correct,
> - // and we have updated 2 blocks. This is the maximum number of blocks
> - // changed before calling this function.
> - if (i > BBNum + 2 &&
> - BBInfo[i].Offset == Offset &&
> - BBInfo[i].KnownBits == KnownBits)
> - break;
> -
> - BBInfo[i].Offset = Offset;
> - BBInfo[i].KnownBits = KnownBits;
> - }
> -}
> -
> -/// Returns true if the distance between specific MI and specific BB can fit in
> -/// MI's displacement field.
> -bool AArch64BranchFixup::isBBInRange(MachineInstr *MI,
> - MachineBasicBlock *DestBB,
> - unsigned OffsetBits) {
> - int64_t BrOffset = getOffsetOf(MI);
> - int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset;
> -
> - DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
> - << " from BB#" << MI->getParent()->getNumber()
> - << " bits available=" << OffsetBits
> - << " from " << getOffsetOf(MI) << " to " << DestOffset
> - << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
> -
> - return isIntN(OffsetBits, DestOffset - BrOffset);
> -}
> -
> -/// Fix up an immediate branch whose destination is too far away to fit in its
> -/// displacement field.
> -bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) {
> - MachineInstr *MI = Br.MI;
> - MachineBasicBlock *DestBB = nullptr;
> - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
> - if (MI->getOperand(i).isMBB()) {
> - DestBB = MI->getOperand(i).getMBB();
> - break;
> - }
> - }
> - assert(DestBB && "Branch with no destination BB?");
> -
> - // Check to see if the DestBB is already in-range.
> - if (isBBInRange(MI, DestBB, Br.OffsetBits))
> - return false;
> -
> - assert(Br.IsCond && "Only conditional branches should need fixup");
> - return fixupConditionalBr(Br);
> -}
> -
> -/// Fix up a conditional branch whose destination is too far away to fit in its
> -/// displacement field. It is converted to an inverse conditional branch + an
> -/// unconditional branch to the destination.
> -bool
> -AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) {
> - MachineInstr *MI = Br.MI;
> - MachineBasicBlock *MBB = MI->getParent();
> - unsigned CondBrMBBOperand = 0;
> -
> - // The general idea is to add an unconditional branch to the destination and
> - // invert the conditional branch to jump over it. Complications occur around
> - // fallthrough and unreachable ends to the block.
> - // b.lt L1
> - // =>
> - // b.ge L2
> - // b L1
> - // L2:
> -
> - // First we invert the conditional branch, by creating a replacement if
> - // necessary. This if statement contains all the special handling of different
> - // branch types.
> - if (MI->getOpcode() == AArch64::Bcc) {
> - // The basic block is operand number 1 for Bcc
> - CondBrMBBOperand = 1;
> -
> - A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm();
> - CC = A64InvertCondCode(CC);
> - MI->getOperand(0).setImm(CC);
> - } else {
> - MachineInstrBuilder InvertedMI;
> - int InvertedOpcode;
> - switch (MI->getOpcode()) {
> - default: llvm_unreachable("Unknown branch type");
> - case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break;
> - case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break;
> - case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break;
> - case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break;
> - case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break;
> - case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break;
> - case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break;
> - case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break;
> - }
> -
> - InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode));
> - for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) {
> - InvertedMI.addOperand(MI->getOperand(i));
> - if (MI->getOperand(i).isMBB())
> - CondBrMBBOperand = i;
> - }
> -
> - MI->eraseFromParent();
> - MI = Br.MI = InvertedMI;
> - }
> -
> - // If the branch is at the end of its MBB and that has a fall-through block,
> - // direct the updated conditional branch to the fall-through
> - // block. Otherwise, split the MBB before the next instruction.
> - MachineInstr *BMI = &MBB->back();
> - bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
> -
> - ++NumCBrFixed;
> - if (BMI != MI) {
> - if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
> - BMI->getOpcode() == AArch64::Bimm) {
> - // Last MI in the BB is an unconditional branch. We can swap destinations:
> - // b.eq L1 (temporarily b.ne L1 after first change)
> - // b L2
> - // =>
> - // b.ne L2
> - // b L1
> - MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
> - if (isBBInRange(MI, NewDest, Br.OffsetBits)) {
> - DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
> - << *BMI);
> - MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB();
> - BMI->getOperand(0).setMBB(DestBB);
> - MI->getOperand(CondBrMBBOperand).setMBB(NewDest);
> - return true;
> - }
> - }
> - }
> -
> - if (NeedSplit) {
> - MachineBasicBlock::iterator MBBI = MI; ++MBBI;
> - splitBlockBeforeInstr(MBBI);
> - // No need for the branch to the next block. We're adding an unconditional
> - // branch to the destination.
> - int delta = TII->getInstSizeInBytes(MBB->back());
> - BBInfo[MBB->getNumber()].Size -= delta;
> - MBB->back().eraseFromParent();
> - // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
> - }
> -
> - // After splitting and removing the unconditional branch from the original BB,
> - // the structure is now:
> - // oldbb:
> - // [things]
> - // b.invertedCC L1
> - // splitbb/fallthroughbb:
> - // [old b L2/real continuation]
> - //
> - // We now have to change the conditional branch to point to splitbb and add an
> - // unconditional branch after it to L1, giving the final structure:
> - // oldbb:
> - // [things]
> - // b.invertedCC splitbb
> - // b L1
> - // splitbb/fallthroughbb:
> - // [old b L2/real continuation]
> - MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
> -
> - DEBUG(dbgs() << " Insert B to BB#"
> - << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber()
> - << " also invert condition and change dest. to BB#"
> - << NextBB->getNumber() << "\n");
> -
> - // Insert a new unconditional branch and fixup the destination of the
> - // conditional one. Also update the ImmBranch as well as adding a new entry
> - // for the new branch.
> - BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm))
> - .addMBB(MI->getOperand(CondBrMBBOperand).getMBB());
> - MI->getOperand(CondBrMBBOperand).setMBB(NextBB);
> -
> - BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
> -
> - // 26 bits written down in Bimm, specifying a multiple of 4.
> - unsigned OffsetBits = 26 + 2;
> - ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false));
> -
> - adjustBBOffsetsAfter(MBB);
> - return true;
> -}
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64CallingConv.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64CallingConv.td?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64CallingConv.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64CallingConv.td (removed)
> @@ -1,197 +0,0 @@
> -//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -// This describes the calling conventions for AArch64 architecture.
> -//===----------------------------------------------------------------------===//
> -
> -
> -// The AArch64 Procedure Call Standard is unfortunately specified at a slightly
> -// higher level of abstraction than LLVM's target interface presents. In
> -// particular, it refers (like other ABIs, in fact) directly to
> -// structs. However, generic LLVM code takes the liberty of lowering structure
> -// arguments to the component fields before we see them.
> -//
> -// As a result, the obvious direct map from LLVM IR to PCS concepts can't be
> -// implemented, so the goals of this calling convention are, in decreasing
> -// priority order:
> -// 1. Expose *some* way to express the concepts required to implement the
> -// generic PCS from a front-end.
> -// 2. Provide a sane ABI for pure LLVM.
> -// 3. Follow the generic PCS as closely as is naturally possible.
> -//
> -// The suggested front-end implementation of PCS features is:
> -// * Integer, float and vector arguments of all sizes which end up in
> -// registers are passed and returned via the natural LLVM type.
> -// * Structure arguments with size <= 16 bytes are passed and returned in
> -// registers as similar integer or composite types. For example:
> -// [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed).
> -// * HFAs in registers follow rules similar to small structs: appropriate
> -// composite types.
> -// * Structure arguments with size > 16 bytes are passed via a pointer,
> -// handled completely by the front-end.
> -// * Structure return values > 16 bytes via an sret pointer argument.
> -// * Other stack-based arguments (not large structs) are passed using byval
> -// pointers. Padding arguments are added beforehand to guarantee a large
> -// struct doesn't later use integer registers.
> -//
> -// N.b. this means that it is the front-end's responsibility (if it cares about
> -// PCS compliance) to check whether enough registers are available for an
> -// argument when deciding how to pass it.
> -
> -class CCIfAlign<int Align, CCAction A>:
> - CCIf<"ArgFlags.getOrigAlign() == " # Align, A>;
> -
> -def CC_A64_APCS : CallingConv<[
> - // SRet is an LLVM-specific concept, so it takes precedence over general ABI
> - // concerns. However, this rule will be used by C/C++ frontends to implement
> - // structure return.
> - CCIfSRet<CCAssignToReg<[X8]>>,
> -
> - // Put ByVal arguments directly on the stack. Minimum size and alignment of a
> - // slot is 64-bit.
> - CCIfByVal<CCPassByVal<8, 8>>,
> -
> - // Canonicalise the various types that live in different floating-point
> - // registers. This makes sense because the PCS does not distinguish Short
> - // Vectors and Floating-point types.
> - CCIfType<[v1i16, v2i8], CCBitConvertToType<f16>>,
> - CCIfType<[v1i32, v4i8, v2i16], CCBitConvertToType<f32>>,
> - CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType<f64>>,
> - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
> - CCBitConvertToType<f128>>,
> -
> - // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision
> - // Floating-point or Short Vector Type and the NSRN is less than 8, then the
> - // argument is allocated to the least significant bits of register
> - // v[NSRN]. The NSRN is incremented by one. The argument has now been
> - // allocated."
> - CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
> - CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
> - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
> - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
> - CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
> -
> - // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated
> - // SIMD and Floating-point registers (NSRN - number of elements < 8), then the
> - // argument is allocated to SIMD and Floating-point registers (with one
> - // register per element of the HFA). The NSRN is incremented by the number of
> - // registers used. The argument has now been allocated."
> - //
> - // N.b. As above, this rule is the responsibility of the front-end.
> -
> - // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of
> - // the argument is rounded up to the nearest multiple of 8 bytes."
> - //
> - // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short
> - // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural
> - // Alignment of the Argument's type."
> - //
> - // It is expected that these will be satisfied by adding dummy arguments to
> - // the prototype.
> -
> - // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point
> - // type then the size of the argument is set to 8 bytes. The effect is as if
> - // the argument had been copied to the least significant bits of a 64-bit
> - // register and the remaining bits filled with unspecified values."
> - CCIfType<[f16, f32], CCPromoteToType<f64>>,
> -
> - // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad-
> - // precision Floating-point or Short Vector Type, then the argument is copied
> - // to memory at the adjusted NSAA. The NSAA is incremented by the size of the
> - // argument. The argument has now been allocated."
> - CCIfType<[f64], CCAssignToStack<8, 8>>,
> - CCIfType<[f128], CCAssignToStack<16, 16>>,
> -
> - // PCS: "C.7: If the argument is an Integral Type, the size of the argument is
> - // less than or equal to 8 bytes and the NGRN is less than 8, the argument is
> - // copied to the least significant bits of x[NGRN]. The NGRN is incremented by
> - // one. The argument has now been allocated."
> -
> - // First we implement C.8 and C.9 (128-bit types get even registers). i128 is
> - // represented as two i64s, the first one being split. If we delayed this
> - // operation C.8 would never be reached.
> - CCIfType<[i64],
> - CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], [X0, X1, X3, X5]>>>,
> -
> - // Note: the promotion also implements C.14.
> - CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
> -
> - // And now the real implementation of C.7
> - CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
> -
> - // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded
> - // up to the next even number."
> - //
> - // "C.9: If the argument is an Integral Type, the size of the argument is
> - // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
> - // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the
> - // memory representation of the argument. The NGRN is incremented by two. The
> - // argument has now been allocated."
> - //
> - // Subtlety here: what if alignment is 16 but it is not an integral type? All
> - // floating-point types have been allocated already, which leaves composite
> - // types: this is why a front-end may need to produce i128 for a struct <= 16
> - // bytes.
> -
> - // PCS: "C.10 If the argument is a Composite Type and the size in double-words
> - // of the argument is not more than 8 minus NGRN, then the argument is copied
> - // into consecutive general-purpose registers, starting at x[NGRN]. The
> - // argument is passed as though it had been loaded into the registers from a
> - // double-word aligned address with an appropriate sequence of LDR
> - // instructions loading consecutive registers from memory (the contents of any
> - // unused parts of the registers are unspecified by this standard). The NGRN
> - // is incremented by the number of registers used. The argument has now been
> - // allocated."
> - //
> - // Another one that's the responsibility of the front-end (sigh).
> -
> - // PCS: "C.11: The NGRN is set to 8."
> - CCCustom<"CC_AArch64NoMoreRegs">,
> -
> - // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural
> - // Alignment of the argument's type."
> - //
> - // PCS: "C.13: If the argument is a composite type then the argument is copied
> - // to memory at the adjusted NSAA. The NSAA is by the size of the
> - // argument. The argument has now been allocated."
> - //
> - // Note that the effect of this corresponds to a memcpy rather than register
> - // stores so that the struct ends up correctly addressable at the adjusted
> - // NSAA.
> -
> - // PCS: "C.14: If the size of the argument is less than 8 bytes then the size
> - // of the argument is set to 8 bytes. The effect is as if the argument was
> - // copied to the least significant bits of a 64-bit register and the remaining
> - // bits filled with unspecified values."
> - //
> - // Integer types were widened above. Floating-point and composite types have
> - // already been allocated completely. Nothing to do.
> -
> - // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA
> - // is incremented by the size of the argument. The argument has now been
> - // allocated."
> - CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
> - CCIfType<[i64], CCAssignToStack<8, 8>>
> -
> -]>;
> -
> -// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits
> -// of vector registers (8-15) are callee-saved. The order here is is picked up
> -// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of
> -// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at
> -// [sp-16], ...
> -def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19),
> - (sequence "D%u", 15, 8))>;
> -
> -
> -// TLS descriptor calls are extremely restricted in their changes, to allow
> -// optimisations in the (hopefully) more common fast path where no real action
> -// is needed. They actually have to preserve all registers, except for the
> -// unavoidable X30 and the return register X0.
> -def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1),
> - (sequence "Q%u", 31, 0))>;
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (removed)
> @@ -1,626 +0,0 @@
> -//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file contains the AArch64 implementation of TargetFrameLowering class.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#include "AArch64.h"
> -#include "AArch64FrameLowering.h"
> -#include "AArch64InstrInfo.h"
> -#include "AArch64MachineFunctionInfo.h"
> -#include "llvm/CodeGen/MachineFrameInfo.h"
> -#include "llvm/CodeGen/MachineFunction.h"
> -#include "llvm/CodeGen/MachineInstrBuilder.h"
> -#include "llvm/CodeGen/MachineMemOperand.h"
> -#include "llvm/CodeGen/MachineModuleInfo.h"
> -#include "llvm/CodeGen/MachineRegisterInfo.h"
> -#include "llvm/CodeGen/RegisterScavenging.h"
> -#include "llvm/IR/Function.h"
> -#include "llvm/MC/MachineLocation.h"
> -#include "llvm/Support/Debug.h"
> -#include "llvm/Support/ErrorHandling.h"
> -
> -using namespace llvm;
> -
> -void AArch64FrameLowering::splitSPAdjustments(uint64_t Total,
> - uint64_t &Initial,
> - uint64_t &Residual) const {
> - // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP
> - // instructions have a 7-bit signed immediate scaled by 8, giving a reach of
> - // 0x1f8, but stack adjustment should always be a multiple of 16.
> - if (Total <= 0x1f0) {
> - Initial = Total;
> - Residual = 0;
> - } else {
> - Initial = 0x1f0;
> - Residual = Total - Initial;
> - }
> -}
> -
> -void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
> - AArch64MachineFunctionInfo *FuncInfo =
> - MF.getInfo<AArch64MachineFunctionInfo>();
> - MachineBasicBlock &MBB = MF.front();
> - MachineBasicBlock::iterator MBBI = MBB.begin();
> - MachineFrameInfo *MFI = MF.getFrameInfo();
> - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
> - DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
> -
> - MachineModuleInfo &MMI = MF.getMMI();
> - const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
> - bool NeedsFrameMoves = MMI.hasDebugInfo()
> - || MF.getFunction()->needsUnwindTableEntry();
> -
> - uint64_t NumInitialBytes, NumResidualBytes;
> -
> - // Currently we expect the stack to be laid out by
> - // sub sp, sp, #initial
> - // stp x29, x30, [sp, #offset]
> - // ...
> - // str xxx, [sp, #offset]
> - // sub sp, sp, #rest (possibly via extra instructions).
> - if (MFI->getCalleeSavedInfo().size()) {
> - // If there are callee-saved registers, we want to store them efficiently as
> - // a block, and virtual base assignment happens too early to do it for us so
> - // we adjust the stack in two phases: first just for callee-saved fiddling,
> - // then to allocate the rest of the frame.
> - splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes);
> - } else {
> - // If there aren't any callee-saved registers, two-phase adjustment is
> - // inefficient. It's more efficient to adjust with NumInitialBytes too
> - // because when we're in a "callee pops argument space" situation, that pop
> - // must be tacked onto Initial for correctness.
> - NumInitialBytes = MFI->getStackSize();
> - NumResidualBytes = 0;
> - }
> -
> - // Tell everyone else how much adjustment we're expecting them to use. In
> - // particular if an adjustment is required for a tail call the epilogue could
> - // have a different view of things.
> - FuncInfo->setInitialStackAdjust(NumInitialBytes);
> -
> - emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes,
> - MachineInstr::FrameSetup);
> -
> - if (NeedsFrameMoves && NumInitialBytes) {
> - // We emit this update even if the CFA is set from a frame pointer later so
> - // that the CFA is valid in the interim.
> - MachineLocation Dst(MachineLocation::VirtualFP);
> - unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true);
> - unsigned CFIIndex = MMI.addFrameInst(
> - MCCFIInstruction::createDefCfa(nullptr, Reg, -NumInitialBytes));
> - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
> - .addCFIIndex(CFIIndex);
> - }
> -
> - // Otherwise we need to set the frame pointer and/or add a second stack
> - // adjustment.
> -
> - bool FPNeedsSetting = hasFP(MF);
> - for (; MBBI != MBB.end(); ++MBBI) {
> - // Note that this search makes strong assumptions about the operation used
> - // to store the frame-pointer: it must be "STP x29, x30, ...". This could
> - // change in future, but until then there's no point in implementing
> - // untestable more generic cases.
> - if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR
> - && MBBI->getOperand(0).getReg() == AArch64::X29) {
> - int64_t X29FrameIdx = MBBI->getOperand(2).getIndex();
> - FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx));
> -
> - ++MBBI;
> - emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP,
> - AArch64::X29,
> - NumInitialBytes + MFI->getObjectOffset(X29FrameIdx),
> - MachineInstr::FrameSetup);
> -
> - // The offset adjustment used when emitting debugging locations relative
> - // to whatever frame base is set. AArch64 uses the default frame base (FP
> - // or SP) and this adjusts the calculations to be correct.
> - MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx)
> - - MFI->getStackSize());
> -
> - if (NeedsFrameMoves) {
> - unsigned Reg = MRI->getDwarfRegNum(AArch64::X29, true);
> - unsigned Offset = MFI->getObjectOffset(X29FrameIdx);
> - unsigned CFIIndex = MMI.addFrameInst(
> - MCCFIInstruction::createDefCfa(nullptr, Reg, Offset));
> - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
> - .addCFIIndex(CFIIndex);
> - }
> -
> - FPNeedsSetting = false;
> - }
> -
> - if (!MBBI->getFlag(MachineInstr::FrameSetup))
> - break;
> - }
> -
> - assert(!FPNeedsSetting && "Frame pointer couldn't be set");
> -
> - emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes,
> - MachineInstr::FrameSetup);
> -
> - // Now we emit the rest of the frame setup information, if necessary: we've
> - // already noted the FP and initial SP moves so we're left with the prologue's
> - // final SP update and callee-saved register locations.
> - if (!NeedsFrameMoves)
> - return;
> -
> - // The rest of the stack adjustment
> - if (!hasFP(MF) && NumResidualBytes) {
> - MachineLocation Dst(MachineLocation::VirtualFP);
> - unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true);
> - unsigned Offset = NumResidualBytes + NumInitialBytes;
> - unsigned CFIIndex =
> - MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
> - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
> - .addCFIIndex(CFIIndex);
> - }
> -
> - // And any callee-saved registers (it's fine to leave them to the end here,
> - // because the old values are still valid at this point.
> - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
> - if (CSI.size()) {
> - for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
> - E = CSI.end(); I != E; ++I) {
> - unsigned Offset = MFI->getObjectOffset(I->getFrameIdx());
> - unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true);
> - unsigned CFIIndex = MMI.addFrameInst(
> - MCCFIInstruction::createOffset(nullptr, Reg, Offset));
> - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
> - .addCFIIndex(CFIIndex);
> - }
> - }
> -}
> -
> -void
> -AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
> - MachineBasicBlock &MBB) const {
> - AArch64MachineFunctionInfo *FuncInfo =
> - MF.getInfo<AArch64MachineFunctionInfo>();
> -
> - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
> - DebugLoc DL = MBBI->getDebugLoc();
> - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
> - MachineFrameInfo &MFI = *MF.getFrameInfo();
> - unsigned RetOpcode = MBBI->getOpcode();
> -
> - // Initial and residual are named for consitency with the prologue. Note that
> - // in the epilogue, the residual adjustment is executed first.
> - uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust();
> - uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes;
> - uint64_t ArgumentPopSize = 0;
> - if (RetOpcode == AArch64::TC_RETURNdi ||
> - RetOpcode == AArch64::TC_RETURNxi) {
> - MachineOperand &JumpTarget = MBBI->getOperand(0);
> - MachineOperand &StackAdjust = MBBI->getOperand(1);
> -
> - MachineInstrBuilder MIB;
> - if (RetOpcode == AArch64::TC_RETURNdi) {
> - MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm));
> - if (JumpTarget.isGlobal()) {
> - MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
> - JumpTarget.getTargetFlags());
> - } else {
> - assert(JumpTarget.isSymbol() && "unexpected tail call destination");
> - MIB.addExternalSymbol(JumpTarget.getSymbolName(),
> - JumpTarget.getTargetFlags());
> - }
> - } else {
> - assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg()
> - && "Unexpected tail call");
> -
> - MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx));
> - MIB.addReg(JumpTarget.getReg(), RegState::Kill);
> - }
> -
> - // Add the extra operands onto the new tail call instruction even though
> - // they're not used directly (so that liveness is tracked properly etc).
> - for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
> - MIB->addOperand(MBBI->getOperand(i));
> -
> -
> - // Delete the pseudo instruction TC_RETURN.
> - MachineInstr *NewMI = std::prev(MBBI);
> - MBB.erase(MBBI);
> - MBBI = NewMI;
> -
> - // For a tail-call in a callee-pops-arguments environment, some or all of
> - // the stack may actually be in use for the call's arguments, this is
> - // calculated during LowerCall and consumed here...
> - ArgumentPopSize = StackAdjust.getImm();
> - } else {
> - // ... otherwise the amount to pop is *all* of the argument space,
> - // conveniently stored in the MachineFunctionInfo by
> - // LowerFormalArguments. This will, of course, be zero for the C calling
> - // convention.
> - ArgumentPopSize = FuncInfo->getArgumentStackToRestore();
> - }
> -
> - assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0
> - && "refusing to adjust stack by misaligned amt");
> -
> - // We may need to address callee-saved registers differently, so find out the
> - // bound on the frame indices.
> - const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
> - int MinCSFI = 0;
> - int MaxCSFI = -1;
> -
> - if (CSI.size()) {
> - MinCSFI = CSI[0].getFrameIdx();
> - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
> - }
> -
> - // The "residual" stack update comes first from this direction and guarantees
> - // that SP is NumInitialBytes below its value on function entry, either by a
> - // direct update or restoring it from the frame pointer.
> - if (NumInitialBytes + ArgumentPopSize != 0) {
> - emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16,
> - NumInitialBytes + ArgumentPopSize);
> - --MBBI;
> - }
> -
> -
> - // MBBI now points to the instruction just past the last callee-saved
> - // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp"
> - // otherwise).
> -
> - // Now we need to find out where to put the bulk of the stack adjustment
> - MachineBasicBlock::iterator FirstEpilogue = MBBI;
> - while (MBBI != MBB.begin()) {
> - --MBBI;
> -
> - unsigned FrameOp;
> - for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) {
> - if (MBBI->getOperand(FrameOp).isFI())
> - break;
> - }
> -
> - // If this instruction doesn't have a frame index we've reached the end of
> - // the callee-save restoration.
> - if (FrameOp == MBBI->getNumOperands())
> - break;
> -
> - // Likewise if it *is* a local reference, but not to a callee-saved object.
> - int FrameIdx = MBBI->getOperand(FrameOp).getIndex();
> - if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI)
> - break;
> -
> - FirstEpilogue = MBBI;
> - }
> -
> - if (MF.getFrameInfo()->hasVarSizedObjects()) {
> - int64_t StaticFrameBase;
> - StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset());
> - emitRegUpdate(MBB, FirstEpilogue, DL, TII,
> - AArch64::XSP, AArch64::X29, AArch64::NoRegister,
> - StaticFrameBase);
> - } else {
> - emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes);
> - }
> -}
> -
> -int64_t
> -AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
> - int FrameIndex,
> - unsigned &FrameReg,
> - int SPAdj,
> - bool IsCalleeSaveOp) const {
> - AArch64MachineFunctionInfo *FuncInfo =
> - MF.getInfo<AArch64MachineFunctionInfo>();
> - MachineFrameInfo *MFI = MF.getFrameInfo();
> -
> - int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex);
> -
> - assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0)
> - && "callee-saved register in unexpected place");
> -
> - // If the frame for this function is particularly large, we adjust the stack
> - // in two phases which means the callee-save related operations see a
> - // different (intermediate) stack size.
> - int64_t FrameRegPos;
> - if (IsCalleeSaveOp) {
> - FrameReg = AArch64::XSP;
> - FrameRegPos = -static_cast<int64_t>(FuncInfo->getInitialStackAdjust());
> - } else if (useFPForAddressing(MF)) {
> - // Have to use the frame pointer since we have no idea where SP is.
> - FrameReg = AArch64::X29;
> - FrameRegPos = FuncInfo->getFramePointerOffset();
> - } else {
> - FrameReg = AArch64::XSP;
> - FrameRegPos = -static_cast<int64_t>(MFI->getStackSize()) + SPAdj;
> - }
> -
> - return TopOfFrameOffset - FrameRegPos;
> -}
> -
> -void
> -AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
> - RegScavenger *RS) const {
> - const AArch64RegisterInfo *RegInfo =
> - static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo());
> - MachineFrameInfo *MFI = MF.getFrameInfo();
> - const AArch64InstrInfo &TII =
> - *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
> -
> - if (hasFP(MF)) {
> - MF.getRegInfo().setPhysRegUsed(AArch64::X29);
> - MF.getRegInfo().setPhysRegUsed(AArch64::X30);
> - }
> -
> - // If addressing of local variables is going to be more complicated than
> - // shoving a base register and an offset into the instruction then we may well
> - // need to scavenge registers. We should either specifically add an
> - // callee-save register for this purpose or allocate an extra spill slot.
> - bool BigStack =
> - MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)
> - || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
> - || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
> -
> - if (!BigStack)
> - return;
> -
> - // We certainly need some slack space for the scavenger, preferably an extra
> - // register.
> - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs();
> - MCPhysReg ExtraReg = AArch64::NoRegister;
> -
> - for (unsigned i = 0; CSRegs[i]; ++i) {
> - if (AArch64::GPR64RegClass.contains(CSRegs[i]) &&
> - !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) {
> - ExtraReg = CSRegs[i];
> - break;
> - }
> - }
> -
> - if (ExtraReg != 0) {
> - MF.getRegInfo().setPhysRegUsed(ExtraReg);
> - } else {
> - assert(RS && "Expect register scavenger to be available");
> -
> - // Create a stack slot for scavenging purposes. PrologEpilogInserter
> - // helpfully places it near either SP or FP for us to avoid
> - // infinitely-regression during scavenging.
> - const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
> - RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
> - RC->getAlignment(),
> - false));
> - }
> -}
> -
> -bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB,
> - unsigned Reg) const {
> - // If @llvm.returnaddress is called then it will refer to X30 by some means;
> - // the prologue store does not kill the register.
> - if (Reg == AArch64::X30) {
> - if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken()
> - && MBB.getParent()->getRegInfo().isLiveIn(Reg))
> - return false;
> - }
> -
> - // In all other cases, physical registers are dead after they've been saved
> - // but live at the beginning of the prologue block.
> - MBB.addLiveIn(Reg);
> - return true;
> -}
> -
> -void
> -AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MBBI,
> - const std::vector<CalleeSavedInfo> &CSI,
> - const TargetRegisterInfo *TRI,
> - const LoadStoreMethod PossClasses[],
> - unsigned NumClasses) const {
> - DebugLoc DL = MBB.findDebugLoc(MBBI);
> - MachineFunction &MF = *MBB.getParent();
> - MachineFrameInfo &MFI = *MF.getFrameInfo();
> - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
> -
> - // A certain amount of implicit contract is present here. The actual stack
> - // offsets haven't been allocated officially yet, so for strictly correct code
> - // we rely on the fact that the elements of CSI are allocated in order
> - // starting at SP, purely as dictated by size and alignment. In practice since
> - // this function handles the only accesses to those slots it's not quite so
> - // important.
> - //
> - // We have also ordered the Callee-saved register list in AArch64CallingConv
> - // so that the above scheme puts registers in order: in particular we want
> - // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2)
> - for (unsigned i = 0, e = CSI.size(); i < e; ++i) {
> - unsigned Reg = CSI[i].getReg();
> -
> - // First we need to find out which register class the register belongs to so
> - // that we can use the correct load/store instrucitons.
> - unsigned ClassIdx;
> - for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) {
> - if (PossClasses[ClassIdx].RegClass->contains(Reg))
> - break;
> - }
> - assert(ClassIdx != NumClasses
> - && "Asked to store register in unexpected class");
> - const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass;
> -
> - // Now we need to decide whether it's possible to emit a paired instruction:
> - // for this we want the next register to be in the same class.
> - MachineInstrBuilder NewMI;
> - bool Pair = false;
> - if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) {
> - Pair = true;
> - unsigned StLow = 0, StHigh = 0;
> - if (isPrologue) {
> - // Most of these registers will be live-in to the MBB and killed by our
> - // store, though there are exceptions (see determinePrologueDeath).
> - StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg()));
> - StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
> - } else {
> - StLow = RegState::Define;
> - StHigh = RegState::Define;
> - }
> -
> - NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode))
> - .addReg(CSI[i+1].getReg(), StLow)
> - .addReg(CSI[i].getReg(), StHigh);
> -
> - // If it's a paired op, we've consumed two registers
> - ++i;
> - } else {
> - unsigned State;
> - if (isPrologue) {
> - State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
> - } else {
> - State = RegState::Define;
> - }
> -
> - NewMI = BuildMI(MBB, MBBI, DL,
> - TII.get(PossClasses[ClassIdx].SingleOpcode))
> - .addReg(CSI[i].getReg(), State);
> - }
> -
> - // Note that the FrameIdx refers to the second register in a pair: it will
> - // be allocated the smaller numeric address and so is the one an LDP/STP
> - // address must use.
> - int FrameIdx = CSI[i].getFrameIdx();
> - MachineMemOperand::MemOperandFlags Flags;
> - Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
> - MachineMemOperand *MMO =
> - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
> - Flags,
> - Pair ? TheClass.getSize() * 2 : TheClass.getSize(),
> - MFI.getObjectAlignment(FrameIdx));
> -
> - NewMI.addFrameIndex(FrameIdx)
> - .addImm(0) // address-register offset
> - .addMemOperand(MMO);
> -
> - if (isPrologue)
> - NewMI.setMIFlags(MachineInstr::FrameSetup);
> -
> - // For aesthetic reasons, during an epilogue we want to emit complementary
> - // operations to the prologue, but in the opposite order. So we still
> - // iterate through the CalleeSavedInfo list in order, but we put the
> - // instructions successively earlier in the MBB.
> - if (!isPrologue)
> - --MBBI;
> - }
> -}
> -
> -bool
> -AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MBBI,
> - const std::vector<CalleeSavedInfo> &CSI,
> - const TargetRegisterInfo *TRI) const {
> - if (CSI.empty())
> - return false;
> -
> - static const LoadStoreMethod PossibleClasses[] = {
> - {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR},
> - {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR},
> - };
> - const unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
> -
> - emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI,
> - PossibleClasses, NumClasses);
> -
> - return true;
> -}
> -
> -bool
> -AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MBBI,
> - const std::vector<CalleeSavedInfo> &CSI,
> - const TargetRegisterInfo *TRI) const {
> -
> - if (CSI.empty())
> - return false;
> -
> - static const LoadStoreMethod PossibleClasses[] = {
> - {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR},
> - {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR},
> - };
> - const unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
> -
> - emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI,
> - PossibleClasses, NumClasses);
> -
> - return true;
> -}
> -
> -bool
> -AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
> - const MachineFrameInfo *MFI = MF.getFrameInfo();
> - const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
> -
> - // This is a decision of ABI compliance. The AArch64 PCS gives various options
> - // for conformance, and even at the most stringent level more or less permits
> - // elimination for leaf functions because there's no loss of functionality
> - // (for debugging etc)..
> - if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls())
> - return true;
> -
> - // The following are hard-limits: incorrect code will be generated if we try
> - // to omit the frame.
> - return (RI->needsStackRealignment(MF) ||
> - MFI->hasVarSizedObjects() ||
> - MFI->isFrameAddressTaken());
> -}
> -
> -bool
> -AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const {
> - return MF.getFrameInfo()->hasVarSizedObjects();
> -}
> -
> -bool
> -AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
> - const MachineFrameInfo *MFI = MF.getFrameInfo();
> -
> - // Of the various reasons for having a frame pointer, it's actually only
> - // variable-sized objects that prevent reservation of a call frame.
> - return !(hasFP(MF) && MFI->hasVarSizedObjects());
> -}
> -
> -void
> -AArch64FrameLowering::eliminateCallFramePseudoInstr(
> - MachineFunction &MF,
> - MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MI) const {
> - const AArch64InstrInfo &TII =
> - *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
> - DebugLoc dl = MI->getDebugLoc();
> - int Opcode = MI->getOpcode();
> - bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode();
> - uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0;
> -
> - if (!hasReservedCallFrame(MF)) {
> - unsigned Align = getStackAlignment();
> -
> - int64_t Amount = MI->getOperand(0).getImm();
> - Amount = RoundUpToAlignment(Amount, Align);
> - if (!IsDestroy) Amount = -Amount;
> -
> - // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
> - // doesn't have to pop anything), then the first operand will be zero too so
> - // this adjustment is a no-op.
> - if (CalleePopAmount == 0) {
> - // FIXME: in-function stack adjustment for calls is limited to 12-bits
> - // because there's no guaranteed temporary register available. Mostly call
> - // frames will be allocated at the start of a function so this is OK, but
> - // it is a limitation that needs dealing with.
> - assert(Amount > -0xfff && Amount < 0xfff && "call frame too large");
> - emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount);
> - }
> - } else if (CalleePopAmount != 0) {
> - // If the calling convention demands that the callee pops arguments from the
> - // stack, we want to add it back if we have a reserved call frame.
> - assert(CalleePopAmount < 0xfff && "call frame too large");
> - emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount);
> - }
> -
> - MBB.erase(MI);
> -}
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h (removed)
> @@ -1,108 +0,0 @@
> -//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This class implements the AArch64-specific parts of the TargetFrameLowering
> -// class.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#ifndef LLVM_AARCH64_FRAMEINFO_H
> -#define LLVM_AARCH64_FRAMEINFO_H
> -
> -#include "AArch64Subtarget.h"
> -#include "llvm/Target/TargetFrameLowering.h"
> -
> -namespace llvm {
> -class AArch64Subtarget;
> -
> -class AArch64FrameLowering : public TargetFrameLowering {
> -private:
> - // In order to unify the spilling and restoring of callee-saved registers into
> - // emitFrameMemOps, we need to be able to specify which instructions to use
> - // for the relevant memory operations on each register class. An array of the
> - // following struct is populated and passed in to achieve this.
> - struct LoadStoreMethod {
> - const TargetRegisterClass *RegClass; // E.g. GPR64RegClass
> -
> - // The preferred instruction.
> - unsigned PairOpcode; // E.g. LSPair64_STR
> -
> - // Sometimes only a single register can be handled at once.
> - unsigned SingleOpcode; // E.g. LS64_STR
> - };
> -protected:
> - const AArch64Subtarget &STI;
> -
> -public:
> - explicit AArch64FrameLowering(const AArch64Subtarget &sti)
> - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16),
> - STI(sti) {
> - }
> -
> - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
> - /// the function.
> - void emitPrologue(MachineFunction &MF) const override;
> - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
> -
> - /// Decides how much stack adjustment to perform in each phase of the prologue
> - /// and epilogue.
> - void splitSPAdjustments(uint64_t Total, uint64_t &Initial,
> - uint64_t &Residual) const;
> -
> - int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex,
> - unsigned &FrameReg, int SPAdj,
> - bool IsCalleeSaveOp) const;
> -
> - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
> - RegScavenger *RS) const override;
> -
> - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MI,
> - const std::vector<CalleeSavedInfo> &CSI,
> - const TargetRegisterInfo *TRI) const override;
> - bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MI,
> - const std::vector<CalleeSavedInfo> &CSI,
> - const TargetRegisterInfo *TRI) const override;
> -
> - void
> - eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MI) const override;
> -
> - /// If the register is X30 (i.e. LR) and the return address is used in the
> - /// function then the callee-save store doesn't actually kill the register,
> - /// otherwise it does.
> - bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const;
> -
> - /// This function emits the loads or stores required during prologue and
> - /// epilogue as efficiently as possible.
> - ///
> - /// The operations involved in setting up and tearing down the frame are
> - /// similar enough to warrant a shared function, particularly as discrepancies
> - /// between the two would be disastrous.
> - void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MI,
> - const std::vector<CalleeSavedInfo> &CSI,
> - const TargetRegisterInfo *TRI,
> - const LoadStoreMethod PossibleClasses[],
> - unsigned NumClasses) const;
> -
> -
> - bool hasFP(const MachineFunction &MF) const override;
> -
> - bool useFPForAddressing(const MachineFunction &MF) const;
> -
> - /// On AA
> - bool hasReservedCallFrame(const MachineFunction &MF) const override;
> -
> -};
> -
> -} // End llvm namespace
> -
> -#endif
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (removed)
> @@ -1,1576 +0,0 @@
> -//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file defines an instruction selector for the AArch64 target.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#include "AArch64.h"
> -#include "AArch64InstrInfo.h"
> -#include "AArch64Subtarget.h"
> -#include "AArch64TargetMachine.h"
> -#include "Utils/AArch64BaseInfo.h"
> -#include "llvm/ADT/APSInt.h"
> -#include "llvm/CodeGen/SelectionDAGISel.h"
> -#include "llvm/IR/GlobalValue.h"
> -#include "llvm/Support/Debug.h"
> -#include "llvm/Support/raw_ostream.h"
> -
> -using namespace llvm;
> -
> -#define DEBUG_TYPE "aarch64-isel"
> -
> -//===--------------------------------------------------------------------===//
> -/// AArch64 specific code to select AArch64 machine instructions for
> -/// SelectionDAG operations.
> -///
> -namespace {
> -
> -class AArch64DAGToDAGISel : public SelectionDAGISel {
> - AArch64TargetMachine &TM;
> -
> - /// Keep a pointer to the AArch64Subtarget around so that we can
> - /// make the right decision when generating code for different targets.
> - const AArch64Subtarget *Subtarget;
> -
> -public:
> - explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
> - CodeGenOpt::Level OptLevel)
> - : SelectionDAGISel(tm, OptLevel), TM(tm),
> - Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
> - }
> -
> - const char *getPassName() const override {
> - return "AArch64 Instruction Selection";
> - }
> -
> - // Include the pieces autogenerated from the target description.
> -#include "AArch64GenDAGISel.inc"
> -
> - template<unsigned MemSize>
> - bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
> - const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
> - if (!CN || CN->getZExtValue() % MemSize != 0
> - || CN->getZExtValue() / MemSize > 0xfff)
> - return false;
> -
> - UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
> - return true;
> - }
> -
> - template<unsigned RegWidth>
> - bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
> - return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
> - }
> -
> - /// Used for pre-lowered address-reference nodes, so we already know
> - /// the fields match. This operand's job is simply to add an
> - /// appropriate shift operand to the MOVZ/MOVK instruction.
> - template<unsigned LogShift>
> - bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) {
> - Imm = N;
> - Shift = CurDAG->getTargetConstant(LogShift, MVT::i32);
> - return true;
> - }
> -
> - bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
> -
> - bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
> - unsigned RegWidth);
> -
> - bool SelectInlineAsmMemoryOperand(const SDValue &Op,
> - char ConstraintCode,
> - std::vector<SDValue> &OutOps) override;
> -
> - bool SelectLogicalImm(SDValue N, SDValue &Imm);
> -
> - template<unsigned RegWidth>
> - bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
> - return SelectTSTBOperand(N, FixedPos, RegWidth);
> - }
> -
> - bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
> -
> - SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32,
> - unsigned Op64);
> -
> - /// Put the given constant into a pool and return a DAG which will give its
> - /// address.
> - SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV);
> -
> - SDNode *TrySelectToMoveImm(SDNode *N);
> - SDNode *LowerToFPLitPool(SDNode *Node);
> - SDNode *SelectToLitPool(SDNode *N);
> -
> - SDNode* Select(SDNode*) override;
> -private:
> - /// Get the opcode for table lookup instruction
> - unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec);
> -
> - /// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4.
> - /// IsExt is to indicate if the result will be extended with an argument.
> - SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt);
> -
> - /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4.
> - SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
> - const uint16_t *Opcode);
> -
> - /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4.
> - SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
> - const uint16_t *Opcodes);
> -
> - /// Form sequences of consecutive 64/128-bit registers for use in NEON
> - /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
> - /// between 1 and 4 elements. If it contains a single element that is returned
> - /// unchanged; otherwise a REG_SEQUENCE value is returned.
> - SDValue createDTuple(ArrayRef<SDValue> Vecs);
> - SDValue createQTuple(ArrayRef<SDValue> Vecs);
> -
> - /// Generic helper for the createDTuple/createQTuple
> - /// functions. Those should almost always be called instead.
> - SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
> - unsigned SubRegs[]);
> -
> - /// Select NEON load-duplicate intrinsics. NumVecs should be 2, 3 or 4.
> - /// The opcode array specifies the instructions used for load.
> - SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
> - const uint16_t *Opcodes);
> -
> - /// Select NEON load/store lane intrinsics. NumVecs should be 2, 3 or 4.
> - /// The opcode arrays specify the instructions used for load/store.
> - SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
> - unsigned NumVecs, const uint16_t *Opcodes);
> -
> - SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
> - SDValue Operand);
> -};
> -}
> -
> -bool
> -AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
> - unsigned RegWidth) {
> - const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
> - if (!CN) return false;
> -
> - // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
> - // is between 1 and 32 for a destination w-register, or 1 and 64 for an
> - // x-register.
> - //
> - // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
> - // want THIS_NODE to be 2^fbits. This is much easier to deal with using
> - // integers.
> - bool IsExact;
> -
> - // fbits is between 1 and 64 in the worst-case, which means the fmul
> - // could have 2^64 as an actual operand. Need 65 bits of precision.
> - APSInt IntVal(65, true);
> - CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
> -
> - // N.b. isPowerOf2 also checks for > 0.
> - if (!IsExact || !IntVal.isPowerOf2()) return false;
> - unsigned FBits = IntVal.logBase2();
> -
> - // Checks above should have guaranteed that we haven't lost information in
> - // finding FBits, but it must still be in range.
> - if (FBits == 0 || FBits > RegWidth) return false;
> -
> - FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
> - return true;
> -}
> -
> -bool
> -AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
> - char ConstraintCode,
> - std::vector<SDValue> &OutOps) {
> - switch (ConstraintCode) {
> - default: llvm_unreachable("Unrecognised AArch64 memory constraint");
> - case 'm':
> - // FIXME: more freedom is actually permitted for 'm'. We can go
> - // hunting for a base and an offset if we want. Of course, since
> - // we don't really know how the operand is going to be used we're
> - // probably restricted to the load/store pair's simm7 as an offset
> - // range anyway.
> - case 'Q':
> - OutOps.push_back(Op);
> - }
> -
> - return false;
> -}
> -
> -bool
> -AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
> - ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
> - if (!Imm || !Imm->getValueAPF().isPosZero())
> - return false;
> -
> - // Doesn't actually carry any information, but keeps TableGen quiet.
> - Dummy = CurDAG->getTargetConstant(0, MVT::i32);
> - return true;
> -}
> -
> -bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
> - uint32_t Bits;
> - uint32_t RegWidth = N.getValueType().getSizeInBits();
> -
> - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
> - if (!CN) return false;
> -
> - if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
> - return false;
> -
> - Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
> - return true;
> -}
> -
> -SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
> - SDNode *ResNode;
> - SDLoc dl(Node);
> - EVT DestType = Node->getValueType(0);
> - unsigned DestWidth = DestType.getSizeInBits();
> -
> - unsigned MOVOpcode;
> - EVT MOVType;
> - int UImm16, Shift;
> - uint32_t LogicalBits;
> -
> - uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
> - if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
> - MOVType = DestType;
> - MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
> - } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
> - MOVType = DestType;
> - MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
> - } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
> - // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
> - // use a 32-bit instruction: "movn w0, 0xedbc".
> - MOVType = MVT::i32;
> - MOVOpcode = AArch64::MOVNwii;
> - } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) {
> - MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
> - uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
> -
> - return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
> - CurDAG->getRegister(ZR, DestType),
> - CurDAG->getTargetConstant(LogicalBits, MVT::i32));
> - } else {
> - // Can't handle it in one instruction. There's scope for permitting two (or
> - // more) instructions, but that'll need more thought.
> - return nullptr;
> - }
> -
> - ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
> - CurDAG->getTargetConstant(UImm16, MVT::i32),
> - CurDAG->getTargetConstant(Shift, MVT::i32));
> -
> - if (MOVType != DestType) {
> - ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
> - MVT::i64, MVT::i32, MVT::Other,
> - CurDAG->getTargetConstant(0, MVT::i64),
> - SDValue(ResNode, 0),
> - CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
> - }
> -
> - return ResNode;
> -}
> -
> -SDValue
> -AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL,
> - const Constant *CV) {
> - EVT PtrVT = getTargetLowering()->getPointerTy();
> -
> - switch (getTargetLowering()->getTargetMachine().getCodeModel()) {
> - case CodeModel::Small: {
> - unsigned Alignment =
> - getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
> - return CurDAG->getNode(
> - AArch64ISD::WrapperSmall, DL, PtrVT,
> - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG),
> - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12),
> - CurDAG->getConstant(Alignment, MVT::i32));
> - }
> - case CodeModel::Large: {
> - SDNode *LitAddr;
> - LitAddr = CurDAG->getMachineNode(
> - AArch64::MOVZxii, DL, PtrVT,
> - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
> - CurDAG->getTargetConstant(3, MVT::i32));
> - LitAddr = CurDAG->getMachineNode(
> - AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
> - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
> - CurDAG->getTargetConstant(2, MVT::i32));
> - LitAddr = CurDAG->getMachineNode(
> - AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
> - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
> - CurDAG->getTargetConstant(1, MVT::i32));
> - LitAddr = CurDAG->getMachineNode(
> - AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
> - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC),
> - CurDAG->getTargetConstant(0, MVT::i32));
> - return SDValue(LitAddr, 0);
> - }
> - default:
> - llvm_unreachable("Only small and large code models supported now");
> - }
> -}
> -
> -SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
> - SDLoc DL(Node);
> - uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
> - int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
> - EVT DestType = Node->getValueType(0);
> -
> - // Since we may end up loading a 64-bit constant from a 32-bit entry the
> - // constant in the pool may have a different type to the eventual node.
> - ISD::LoadExtType Extension;
> - EVT MemType;
> -
> - assert((DestType == MVT::i64 || DestType == MVT::i32)
> - && "Only expect integer constants at the moment");
> -
> - if (DestType == MVT::i32) {
> - Extension = ISD::NON_EXTLOAD;
> - MemType = MVT::i32;
> - } else if (UnsignedVal <= UINT32_MAX) {
> - Extension = ISD::ZEXTLOAD;
> - MemType = MVT::i32;
> - } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
> - Extension = ISD::SEXTLOAD;
> - MemType = MVT::i32;
> - } else {
> - Extension = ISD::NON_EXTLOAD;
> - MemType = MVT::i64;
> - }
> -
> - Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
> - MemType.getSizeInBits()),
> - UnsignedVal);
> - SDValue PoolAddr = getConstantPoolItemAddress(DL, CV);
> - unsigned Alignment =
> - getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
> -
> - return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
> - PoolAddr,
> - MachinePointerInfo::getConstantPool(), MemType,
> - /* isVolatile = */ false,
> - /* isNonTemporal = */ false,
> - Alignment).getNode();
> -}
> -
> -SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
> - SDLoc DL(Node);
> - const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
> - EVT DestType = Node->getValueType(0);
> -
> - unsigned Alignment =
> - getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType());
> - SDValue PoolAddr = getConstantPoolItemAddress(DL, FV);
> -
> - return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
> - MachinePointerInfo::getConstantPool(),
> - /* isVolatile = */ false,
> - /* isNonTemporal = */ false,
> - /* isInvariant = */ true,
> - Alignment).getNode();
> -}
> -
> -bool
> -AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
> - unsigned RegWidth) {
> - const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
> - if (!CN) return false;
> -
> - uint64_t Val = CN->getZExtValue();
> -
> - if (!isPowerOf2_64(Val)) return false;
> -
> - unsigned TestedBit = Log2_64(Val);
> - // Checks above should have guaranteed that we haven't lost information in
> - // finding TestedBit, but it must still be in range.
> - if (TestedBit >= RegWidth) return false;
> -
> - FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
> - return true;
> -}
> -
> -SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
> - unsigned Op16,unsigned Op32,
> - unsigned Op64) {
> - // Mostly direct translation to the given operations, except that we preserve
> - // the AtomicOrdering for use later on.
> - AtomicSDNode *AN = cast<AtomicSDNode>(Node);
> - EVT VT = AN->getMemoryVT();
> -
> - unsigned Op;
> - if (VT == MVT::i8)
> - Op = Op8;
> - else if (VT == MVT::i16)
> - Op = Op16;
> - else if (VT == MVT::i32)
> - Op = Op32;
> - else if (VT == MVT::i64)
> - Op = Op64;
> - else
> - llvm_unreachable("Unexpected atomic operation");
> -
> - SmallVector<SDValue, 4> Ops;
> - for (unsigned i = 1; i < AN->getNumOperands(); ++i)
> - Ops.push_back(AN->getOperand(i));
> -
> - Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
> - Ops.push_back(AN->getOperand(0)); // Chain moves to the end
> -
> - return CurDAG->SelectNodeTo(Node, Op, AN->getValueType(0), MVT::Other, Ops);
> -}
> -
> -SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
> - static unsigned RegClassIDs[] = { AArch64::DPairRegClassID,
> - AArch64::DTripleRegClassID,
> - AArch64::DQuadRegClassID };
> - static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1,
> - AArch64::dsub_2, AArch64::dsub_3 };
> -
> - return createTuple(Regs, RegClassIDs, SubRegs);
> -}
> -
> -SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
> - static unsigned RegClassIDs[] = { AArch64::QPairRegClassID,
> - AArch64::QTripleRegClassID,
> - AArch64::QQuadRegClassID };
> - static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1,
> - AArch64::qsub_2, AArch64::qsub_3 };
> -
> - return createTuple(Regs, RegClassIDs, SubRegs);
> -}
> -
> -SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
> - unsigned RegClassIDs[],
> - unsigned SubRegs[]) {
> - // There's no special register-class for a vector-list of 1 element: it's just
> - // a vector.
> - if (Regs.size() == 1)
> - return Regs[0];
> -
> - assert(Regs.size() >= 2 && Regs.size() <= 4);
> -
> - SDLoc DL(Regs[0].getNode());
> -
> - SmallVector<SDValue, 4> Ops;
> -
> - // First operand of REG_SEQUENCE is the desired RegClass.
> - Ops.push_back(
> - CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
> -
> - // Then we get pairs of source & subregister-position for the components.
> - for (unsigned i = 0; i < Regs.size(); ++i) {
> - Ops.push_back(Regs[i]);
> - Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
> - }
> -
> - SDNode *N =
> - CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
> - return SDValue(N, 0);
> -}
> -
> -
> -// Get the register stride update opcode of a VLD/VST instruction that
> -// is otherwise equivalent to the given fixed stride updating instruction.
> -static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
> - switch (Opc) {
> - default: break;
> - case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register;
> - case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register;
> - case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register;
> - case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register;
> - case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register;
> - case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register;
> - case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register;
> - case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register;
> -
> - case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
> - case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
> - case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
> - case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
> - case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
> - case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
> - case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register;
> -
> - case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
> - case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
> - case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
> - case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
> - case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
> - case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
> - case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register;
> -
> - case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
> - case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
> - case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
> - case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
> - case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
> - case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
> - case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;
> -
> - case AArch64::LD1x2WB_8B_fixed: return AArch64::LD1x2WB_8B_register;
> - case AArch64::LD1x2WB_4H_fixed: return AArch64::LD1x2WB_4H_register;
> - case AArch64::LD1x2WB_2S_fixed: return AArch64::LD1x2WB_2S_register;
> - case AArch64::LD1x2WB_1D_fixed: return AArch64::LD1x2WB_1D_register;
> - case AArch64::LD1x2WB_16B_fixed: return AArch64::LD1x2WB_16B_register;
> - case AArch64::LD1x2WB_8H_fixed: return AArch64::LD1x2WB_8H_register;
> - case AArch64::LD1x2WB_4S_fixed: return AArch64::LD1x2WB_4S_register;
> - case AArch64::LD1x2WB_2D_fixed: return AArch64::LD1x2WB_2D_register;
> -
> - case AArch64::LD1x3WB_8B_fixed: return AArch64::LD1x3WB_8B_register;
> - case AArch64::LD1x3WB_4H_fixed: return AArch64::LD1x3WB_4H_register;
> - case AArch64::LD1x3WB_2S_fixed: return AArch64::LD1x3WB_2S_register;
> - case AArch64::LD1x3WB_1D_fixed: return AArch64::LD1x3WB_1D_register;
> - case AArch64::LD1x3WB_16B_fixed: return AArch64::LD1x3WB_16B_register;
> - case AArch64::LD1x3WB_8H_fixed: return AArch64::LD1x3WB_8H_register;
> - case AArch64::LD1x3WB_4S_fixed: return AArch64::LD1x3WB_4S_register;
> - case AArch64::LD1x3WB_2D_fixed: return AArch64::LD1x3WB_2D_register;
> -
> - case AArch64::LD1x4WB_8B_fixed: return AArch64::LD1x4WB_8B_register;
> - case AArch64::LD1x4WB_4H_fixed: return AArch64::LD1x4WB_4H_register;
> - case AArch64::LD1x4WB_2S_fixed: return AArch64::LD1x4WB_2S_register;
> - case AArch64::LD1x4WB_1D_fixed: return AArch64::LD1x4WB_1D_register;
> - case AArch64::LD1x4WB_16B_fixed: return AArch64::LD1x4WB_16B_register;
> - case AArch64::LD1x4WB_8H_fixed: return AArch64::LD1x4WB_8H_register;
> - case AArch64::LD1x4WB_4S_fixed: return AArch64::LD1x4WB_4S_register;
> - case AArch64::LD1x4WB_2D_fixed: return AArch64::LD1x4WB_2D_register;
> -
> - case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
> - case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
> - case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
> - case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register;
> - case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register;
> - case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register;
> - case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register;
> - case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register;
> -
> - case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
> - case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
> - case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
> - case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
> - case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
> - case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
> - case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register;
> -
> - case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
> - case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
> - case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
> - case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
> - case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
> - case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
> - case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register;
> -
> - case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
> - case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
> - case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
> - case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
> - case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
> - case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
> - case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
> -
> - case AArch64::ST1x2WB_8B_fixed: return AArch64::ST1x2WB_8B_register;
> - case AArch64::ST1x2WB_4H_fixed: return AArch64::ST1x2WB_4H_register;
> - case AArch64::ST1x2WB_2S_fixed: return AArch64::ST1x2WB_2S_register;
> - case AArch64::ST1x2WB_1D_fixed: return AArch64::ST1x2WB_1D_register;
> - case AArch64::ST1x2WB_16B_fixed: return AArch64::ST1x2WB_16B_register;
> - case AArch64::ST1x2WB_8H_fixed: return AArch64::ST1x2WB_8H_register;
> - case AArch64::ST1x2WB_4S_fixed: return AArch64::ST1x2WB_4S_register;
> - case AArch64::ST1x2WB_2D_fixed: return AArch64::ST1x2WB_2D_register;
> -
> - case AArch64::ST1x3WB_8B_fixed: return AArch64::ST1x3WB_8B_register;
> - case AArch64::ST1x3WB_4H_fixed: return AArch64::ST1x3WB_4H_register;
> - case AArch64::ST1x3WB_2S_fixed: return AArch64::ST1x3WB_2S_register;
> - case AArch64::ST1x3WB_1D_fixed: return AArch64::ST1x3WB_1D_register;
> - case AArch64::ST1x3WB_16B_fixed: return AArch64::ST1x3WB_16B_register;
> - case AArch64::ST1x3WB_8H_fixed: return AArch64::ST1x3WB_8H_register;
> - case AArch64::ST1x3WB_4S_fixed: return AArch64::ST1x3WB_4S_register;
> - case AArch64::ST1x3WB_2D_fixed: return AArch64::ST1x3WB_2D_register;
> -
> - case AArch64::ST1x4WB_8B_fixed: return AArch64::ST1x4WB_8B_register;
> - case AArch64::ST1x4WB_4H_fixed: return AArch64::ST1x4WB_4H_register;
> - case AArch64::ST1x4WB_2S_fixed: return AArch64::ST1x4WB_2S_register;
> - case AArch64::ST1x4WB_1D_fixed: return AArch64::ST1x4WB_1D_register;
> - case AArch64::ST1x4WB_16B_fixed: return AArch64::ST1x4WB_16B_register;
> - case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register;
> - case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register;
> - case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register;
> -
> - // Post-index of duplicate loads
> - case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register;
> - case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register;
> - case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register;
> - case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register;
> - case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register;
> - case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register;
> - case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register;
> - case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register;
> -
> - case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register;
> - case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register;
> - case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register;
> - case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register;
> - case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register;
> - case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register;
> - case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register;
> - case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register;
> -
> - case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register;
> - case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register;
> - case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register;
> - case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register;
> - case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register;
> - case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register;
> - case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register;
> - case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register;
> -
> - // Post-index of lane loads
> - case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register;
> - case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register;
> - case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register;
> - case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register;
> -
> - case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register;
> - case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register;
> - case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register;
> - case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register;
> -
> - case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register;
> - case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register;
> - case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register;
> - case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register;
> -
> - // Post-index of lane stores
> - case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register;
> - case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register;
> - case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register;
> - case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register;
> -
> - case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register;
> - case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register;
> - case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register;
> - case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register;
> -
> - case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register;
> - case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register;
> - case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register;
> - case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register;
> - }
> - return Opc; // If not one we handle, return it unchanged.
> -}
> -
> -SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating,
> - unsigned NumVecs,
> - const uint16_t *Opcodes) {
> - assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
> -
> - EVT VT = N->getValueType(0);
> - unsigned OpcodeIndex;
> - bool is64BitVector = VT.is64BitVector();
> - switch (VT.getScalarType().getSizeInBits()) {
> - case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
> - case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
> - case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
> - case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
> - default: llvm_unreachable("unhandled vector load type");
> - }
> - unsigned Opc = Opcodes[OpcodeIndex];
> -
> - SmallVector<SDValue, 2> Ops;
> - unsigned AddrOpIdx = isUpdating ? 1 : 2;
> - Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
> -
> - if (isUpdating) {
> - SDValue Inc = N->getOperand(AddrOpIdx + 1);
> - if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
> - Opc = getVLDSTRegisterUpdateOpcode(Opc);
> - Ops.push_back(Inc);
> - }
> -
> - Ops.push_back(N->getOperand(0)); // Push back the Chain
> -
> - SmallVector<EVT, 3> ResTys;
> - // Push back the type of return super register
> - if (NumVecs == 1)
> - ResTys.push_back(VT);
> - else if (NumVecs == 3)
> - ResTys.push_back(MVT::Untyped);
> - else {
> - EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
> - is64BitVector ? NumVecs : NumVecs * 2);
> - ResTys.push_back(ResTy);
> - }
> -
> - if (isUpdating)
> - ResTys.push_back(MVT::i64); // Type of the updated register
> - ResTys.push_back(MVT::Other); // Type of the Chain
> - SDLoc dl(N);
> - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
> -
> - // Transfer memoperands.
> - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
> - MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
> - cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
> -
> - if (NumVecs == 1)
> - return VLd;
> -
> - // If NumVecs > 1, the return result is a super register containing 2-4
> - // consecutive vector registers.
> - SDValue SuperReg = SDValue(VLd, 0);
> -
> - unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
> - for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
> - ReplaceUses(SDValue(N, Vec),
> - CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
> - // Update users of the Chain
> - ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
> - if (isUpdating)
> - ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
> -
> - return nullptr;
> -}
> -
> -SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating,
> - unsigned NumVecs,
> - const uint16_t *Opcodes) {
> - assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
> - SDLoc dl(N);
> -
> - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
> - MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
> -
> - unsigned AddrOpIdx = isUpdating ? 1 : 2;
> - unsigned Vec0Idx = 3;
> - EVT VT = N->getOperand(Vec0Idx).getValueType();
> - unsigned OpcodeIndex;
> - bool is64BitVector = VT.is64BitVector();
> - switch (VT.getScalarType().getSizeInBits()) {
> - case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
> - case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
> - case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
> - case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
> - default: llvm_unreachable("unhandled vector store type");
> - }
> - unsigned Opc = Opcodes[OpcodeIndex];
> -
> - SmallVector<EVT, 2> ResTys;
> - if (isUpdating)
> - ResTys.push_back(MVT::i64);
> - ResTys.push_back(MVT::Other); // Type for the Chain
> -
> - SmallVector<SDValue, 6> Ops;
> - Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
> -
> - if (isUpdating) {
> - SDValue Inc = N->getOperand(AddrOpIdx + 1);
> - if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
> - Opc = getVLDSTRegisterUpdateOpcode(Opc);
> - Ops.push_back(Inc);
> - }
> -
> - SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
> - N->op_begin() + Vec0Idx + NumVecs);
> - SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs);
> - Ops.push_back(SrcReg);
> -
> - // Push back the Chain
> - Ops.push_back(N->getOperand(0));
> -
> - // Transfer memoperands.
> - SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
> - cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
> -
> - return VSt;
> -}
> -
> -SDValue
> -AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
> - SDValue Operand) {
> - SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL,
> - VT, VTD, MVT::Other,
> - CurDAG->getTargetConstant(0, MVT::i64),
> - Operand,
> - CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32));
> - return SDValue(Reg, 0);
> -}
> -
> -SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
> - unsigned NumVecs,
> - const uint16_t *Opcodes) {
> - assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range");
> - SDLoc dl(N);
> -
> - EVT VT = N->getValueType(0);
> - unsigned OpcodeIndex;
> - bool is64BitVector = VT.is64BitVector();
> - switch (VT.getScalarType().getSizeInBits()) {
> - case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
> - case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
> - case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
> - case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
> - default: llvm_unreachable("unhandled vector duplicate lane load type");
> - }
> - unsigned Opc = Opcodes[OpcodeIndex];
> -
> - SDValue SuperReg;
> - SmallVector<SDValue, 6> Ops;
> - Ops.push_back(N->getOperand(1)); // Push back the Memory Address
> - if (isUpdating) {
> - SDValue Inc = N->getOperand(2);
> - if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
> - Opc = getVLDSTRegisterUpdateOpcode(Opc);
> - Ops.push_back(Inc);
> - }
> - Ops.push_back(N->getOperand(0)); // Push back the Chain
> -
> - SmallVector<EVT, 3> ResTys;
> - // Push back the type of return super register
> - if (NumVecs == 3)
> - ResTys.push_back(MVT::Untyped);
> - else {
> - EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
> - is64BitVector ? NumVecs : NumVecs * 2);
> - ResTys.push_back(ResTy);
> - }
> - if (isUpdating)
> - ResTys.push_back(MVT::i64); // Type of the updated register
> - ResTys.push_back(MVT::Other); // Type of the Chain
> - SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
> -
> - // Transfer memoperands.
> - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
> - MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
> - cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
> -
> - SuperReg = SDValue(VLdDup, 0);
> - unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
> - // Update uses of each registers in super register
> - for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
> - ReplaceUses(SDValue(N, Vec),
> - CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
> - // Update uses of the Chain
> - ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
> - if (isUpdating)
> - ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
> - return nullptr;
> -}
> -
> -// We only have 128-bit vector type of load/store lane instructions.
> -// If it is 64-bit vector, we also select it to the 128-bit instructions.
> -// Just use SUBREG_TO_REG to adapt the input to 128-bit vector and
> -// EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output.
> -SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
> - bool isUpdating, unsigned NumVecs,
> - const uint16_t *Opcodes) {
> - assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
> - SDLoc dl(N);
> - unsigned AddrOpIdx = isUpdating ? 1 : 2;
> - unsigned Vec0Idx = 3;
> -
> - SDValue Chain = N->getOperand(0);
> - unsigned Lane =
> - cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
> - EVT VT = N->getOperand(Vec0Idx).getValueType();
> - bool is64BitVector = VT.is64BitVector();
> - EVT VT64; // 64-bit Vector Type
> -
> - if (is64BitVector) {
> - VT64 = VT;
> - VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(),
> - VT.getVectorNumElements() * 2);
> - }
> -
> - unsigned OpcodeIndex;
> - switch (VT.getScalarType().getSizeInBits()) {
> - case 8: OpcodeIndex = 0; break;
> - case 16: OpcodeIndex = 1; break;
> - case 32: OpcodeIndex = 2; break;
> - case 64: OpcodeIndex = 3; break;
> - default: llvm_unreachable("unhandled vector lane load/store type");
> - }
> - unsigned Opc = Opcodes[OpcodeIndex];
> -
> - SmallVector<EVT, 3> ResTys;
> - if (IsLoad) {
> - // Push back the type of return super register
> - if (NumVecs == 3)
> - ResTys.push_back(MVT::Untyped);
> - else {
> - EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
> - is64BitVector ? NumVecs : NumVecs * 2);
> - ResTys.push_back(ResTy);
> - }
> - }
> - if (isUpdating)
> - ResTys.push_back(MVT::i64); // Type of the updated register
> - ResTys.push_back(MVT::Other); // Type of Chain
> - SmallVector<SDValue, 5> Ops;
> - Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
> - if (isUpdating) {
> - SDValue Inc = N->getOperand(AddrOpIdx + 1);
> - if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
> - Opc = getVLDSTRegisterUpdateOpcode(Opc);
> - Ops.push_back(Inc);
> - }
> -
> - SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
> - N->op_begin() + Vec0Idx + NumVecs);
> - if (is64BitVector)
> - for (unsigned i = 0; i < Regs.size(); i++)
> - Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]);
> - SDValue SuperReg = createQTuple(Regs);
> -
> - Ops.push_back(SuperReg); // Source Reg
> - SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32);
> - Ops.push_back(LaneValue);
> - Ops.push_back(Chain); // Push back the Chain
> -
> - SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
> - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
> - MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
> - cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
> - if (!IsLoad)
> - return VLdLn;
> -
> - // Extract the subregisters.
> - SuperReg = SDValue(VLdLn, 0);
> - unsigned Sub0 = AArch64::qsub_0;
> - // Update uses of each registers in super register
> - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
> - SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg);
> - if (is64BitVector) {
> - SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0);
> - }
> - ReplaceUses(SDValue(N, Vec), SUB0);
> - }
> - ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
> - if (isUpdating)
> - ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
> - return nullptr;
> -}
> -
> -unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit,
> - unsigned NumOfVec) {
> - assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range");
> -
> - unsigned Opc = 0;
> - switch (NumOfVec) {
> - default:
> - break;
> - case 1:
> - if (IsExt)
> - Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b;
> - else
> - Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b;
> - break;
> - case 2:
> - if (IsExt)
> - Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b;
> - else
> - Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b;
> - break;
> - case 3:
> - if (IsExt)
> - Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b;
> - else
> - Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b;
> - break;
> - case 4:
> - if (IsExt)
> - Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b;
> - else
> - Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b;
> - break;
> - }
> -
> - return Opc;
> -}
> -
> -SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs,
> - bool IsExt) {
> - assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
> - SDLoc dl(N);
> -
> - // Check the element of look up table is 64-bit or not
> - unsigned Vec0Idx = IsExt ? 2 : 1;
> - assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() &&
> - "The element of lookup table for vtbl and vtbx must be 128-bit");
> -
> - // Check the return value type is 64-bit or not
> - EVT ResVT = N->getValueType(0);
> - bool is64BitRes = ResVT.is64BitVector();
> -
> - // Create new SDValue for vector list
> - SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
> - N->op_begin() + Vec0Idx + NumVecs);
> - SDValue TblReg = createQTuple(Regs);
> - unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs);
> -
> - SmallVector<SDValue, 3> Ops;
> - if (IsExt)
> - Ops.push_back(N->getOperand(1));
> - Ops.push_back(TblReg);
> - Ops.push_back(N->getOperand(Vec0Idx + NumVecs));
> - return CurDAG->getMachineNode(Opc, dl, ResVT, Ops);
> -}
> -
> -SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
> - // Dump information about the Node being selected
> - DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
> -
> - if (Node->isMachineOpcode()) {
> - DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
> - Node->setNodeId(-1);
> - return nullptr;
> - }
> -
> - switch (Node->getOpcode()) {
> - case ISD::ATOMIC_LOAD_ADD:
> - return SelectAtomic(Node,
> - AArch64::ATOMIC_LOAD_ADD_I8,
> - AArch64::ATOMIC_LOAD_ADD_I16,
> - AArch64::ATOMIC_LOAD_ADD_I32,
> - AArch64::ATOMIC_LOAD_ADD_I64);
> - case ISD::ATOMIC_LOAD_SUB:
> - return SelectAtomic(Node,
> - AArch64::ATOMIC_LOAD_SUB_I8,
> - AArch64::ATOMIC_LOAD_SUB_I16,
> - AArch64::ATOMIC_LOAD_SUB_I32,
> - AArch64::ATOMIC_LOAD_SUB_I64);
> - case ISD::ATOMIC_LOAD_AND:
> - return SelectAtomic(Node,
> - AArch64::ATOMIC_LOAD_AND_I8,
> - AArch64::ATOMIC_LOAD_AND_I16,
> - AArch64::ATOMIC_LOAD_AND_I32,
> - AArch64::ATOMIC_LOAD_AND_I64);
> - case ISD::ATOMIC_LOAD_OR:
> - return SelectAtomic(Node,
> - AArch64::ATOMIC_LOAD_OR_I8,
> - AArch64::ATOMIC_LOAD_OR_I16,
> - AArch64::ATOMIC_LOAD_OR_I32,
> - AArch64::ATOMIC_LOAD_OR_I64);
> - case ISD::ATOMIC_LOAD_XOR:
> - return SelectAtomic(Node,
> - AArch64::ATOMIC_LOAD_XOR_I8,
> - AArch64::ATOMIC_LOAD_XOR_I16,
> - AArch64::ATOMIC_LOAD_XOR_I32,
> - AArch64::ATOMIC_LOAD_XOR_I64);
> - case ISD::ATOMIC_LOAD_NAND:
> - return SelectAtomic(Node,
> - AArch64::ATOMIC_LOAD_NAND_I8,
> - AArch64::ATOMIC_LOAD_NAND_I16,
> - AArch64::ATOMIC_LOAD_NAND_I32,
> - AArch64::ATOMIC_LOAD_NAND_I64);
> - case ISD::ATOMIC_LOAD_MIN:
> - return SelectAtomic(Node,
> - AArch64::ATOMIC_LOAD_MIN_I8,
> - AArch64::ATOMIC_LOAD_MIN_I16,
> - AArch64::ATOMIC_LOAD_MIN_I32,
> - AArch64::ATOMIC_LOAD_MIN_I64);
> - case ISD::ATOMIC_LOAD_MAX:
> - return SelectAtomic(Node,
> - AArch64::ATOMIC_LOAD_MAX_I8,
> - AArch64::ATOMIC_LOAD_MAX_I16,
> - AArch64::ATOMIC_LOAD_MAX_I32,
> - AArch64::ATOMIC_LOAD_MAX_I64);
> - case ISD::ATOMIC_LOAD_UMIN:
> - return SelectAtomic(Node,
> - AArch64::ATOMIC_LOAD_UMIN_I8,
> - AArch64::ATOMIC_LOAD_UMIN_I16,
> - AArch64::ATOMIC_LOAD_UMIN_I32,
> - AArch64::ATOMIC_LOAD_UMIN_I64);
> - case ISD::ATOMIC_LOAD_UMAX:
> - return SelectAtomic(Node,
> - AArch64::ATOMIC_LOAD_UMAX_I8,
> - AArch64::ATOMIC_LOAD_UMAX_I16,
> - AArch64::ATOMIC_LOAD_UMAX_I32,
> - AArch64::ATOMIC_LOAD_UMAX_I64);
> - case ISD::ATOMIC_SWAP:
> - return SelectAtomic(Node,
> - AArch64::ATOMIC_SWAP_I8,
> - AArch64::ATOMIC_SWAP_I16,
> - AArch64::ATOMIC_SWAP_I32,
> - AArch64::ATOMIC_SWAP_I64);
> - case ISD::ATOMIC_CMP_SWAP:
> - return SelectAtomic(Node,
> - AArch64::ATOMIC_CMP_SWAP_I8,
> - AArch64::ATOMIC_CMP_SWAP_I16,
> - AArch64::ATOMIC_CMP_SWAP_I32,
> - AArch64::ATOMIC_CMP_SWAP_I64);
> - case ISD::FrameIndex: {
> - int FI = cast<FrameIndexSDNode>(Node)->getIndex();
> - EVT PtrTy = getTargetLowering()->getPointerTy();
> - SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
> - return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
> - TFI, CurDAG->getTargetConstant(0, PtrTy));
> - }
> - case ISD::Constant: {
> - SDNode *ResNode = nullptr;
> - if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
> - // XZR and WZR are probably even better than an actual move: most of the
> - // time they can be folded into another instruction with *no* cost.
> -
> - EVT Ty = Node->getValueType(0);
> - assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
> - uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
> - ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
> - SDLoc(Node),
> - Register, Ty).getNode();
> - }
> -
> - // Next best option is a move-immediate, see if we can do that.
> - if (!ResNode) {
> - ResNode = TrySelectToMoveImm(Node);
> - }
> -
> - if (ResNode)
> - return ResNode;
> -
> - // If even that fails we fall back to a lit-pool entry at the moment. Future
> - // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions.
> - ResNode = SelectToLitPool(Node);
> - assert(ResNode && "We need *some* way to materialise a constant");
> -
> - // We want to continue selection at this point since the litpool access
> - // generated used generic nodes for simplicity.
> - ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
> - Node = ResNode;
> - break;
> - }
> - case ISD::ConstantFP: {
> - if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
> - // FMOV will take care of it from TableGen
> - break;
> - }
> -
> - SDNode *ResNode = LowerToFPLitPool(Node);
> - ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
> -
> - // We want to continue selection at this point since the litpool access
> - // generated used generic nodes for simplicity.
> - Node = ResNode;
> - break;
> - }
> - case AArch64ISD::NEON_LD1_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD1WB_8B_fixed, AArch64::LD1WB_4H_fixed,
> - AArch64::LD1WB_2S_fixed, AArch64::LD1WB_1D_fixed,
> - AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
> - AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed
> - };
> - return SelectVLD(Node, true, 1, Opcodes);
> - }
> - case AArch64ISD::NEON_LD2_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed,
> - AArch64::LD2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed,
> - AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
> - AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed
> - };
> - return SelectVLD(Node, true, 2, Opcodes);
> - }
> - case AArch64ISD::NEON_LD3_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed,
> - AArch64::LD3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed,
> - AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
> - AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed
> - };
> - return SelectVLD(Node, true, 3, Opcodes);
> - }
> - case AArch64ISD::NEON_LD4_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed,
> - AArch64::LD4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed,
> - AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
> - AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed
> - };
> - return SelectVLD(Node, true, 4, Opcodes);
> - }
> - case AArch64ISD::NEON_LD1x2_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD1x2WB_8B_fixed, AArch64::LD1x2WB_4H_fixed,
> - AArch64::LD1x2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed,
> - AArch64::LD1x2WB_16B_fixed, AArch64::LD1x2WB_8H_fixed,
> - AArch64::LD1x2WB_4S_fixed, AArch64::LD1x2WB_2D_fixed
> - };
> - return SelectVLD(Node, true, 2, Opcodes);
> - }
> - case AArch64ISD::NEON_LD1x3_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD1x3WB_8B_fixed, AArch64::LD1x3WB_4H_fixed,
> - AArch64::LD1x3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed,
> - AArch64::LD1x3WB_16B_fixed, AArch64::LD1x3WB_8H_fixed,
> - AArch64::LD1x3WB_4S_fixed, AArch64::LD1x3WB_2D_fixed
> - };
> - return SelectVLD(Node, true, 3, Opcodes);
> - }
> - case AArch64ISD::NEON_LD1x4_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD1x4WB_8B_fixed, AArch64::LD1x4WB_4H_fixed,
> - AArch64::LD1x4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed,
> - AArch64::LD1x4WB_16B_fixed, AArch64::LD1x4WB_8H_fixed,
> - AArch64::LD1x4WB_4S_fixed, AArch64::LD1x4WB_2D_fixed
> - };
> - return SelectVLD(Node, true, 4, Opcodes);
> - }
> - case AArch64ISD::NEON_ST1_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST1WB_8B_fixed, AArch64::ST1WB_4H_fixed,
> - AArch64::ST1WB_2S_fixed, AArch64::ST1WB_1D_fixed,
> - AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
> - AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed
> - };
> - return SelectVST(Node, true, 1, Opcodes);
> - }
> - case AArch64ISD::NEON_ST2_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed,
> - AArch64::ST2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed,
> - AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
> - AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed
> - };
> - return SelectVST(Node, true, 2, Opcodes);
> - }
> - case AArch64ISD::NEON_ST3_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed,
> - AArch64::ST3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed,
> - AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
> - AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed
> - };
> - return SelectVST(Node, true, 3, Opcodes);
> - }
> - case AArch64ISD::NEON_ST4_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed,
> - AArch64::ST4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed,
> - AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
> - AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed
> - };
> - return SelectVST(Node, true, 4, Opcodes);
> - }
> - case AArch64ISD::NEON_LD2DUP: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD2R_8B, AArch64::LD2R_4H, AArch64::LD2R_2S,
> - AArch64::LD2R_1D, AArch64::LD2R_16B, AArch64::LD2R_8H,
> - AArch64::LD2R_4S, AArch64::LD2R_2D
> - };
> - return SelectVLDDup(Node, false, 2, Opcodes);
> - }
> - case AArch64ISD::NEON_LD3DUP: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD3R_8B, AArch64::LD3R_4H, AArch64::LD3R_2S,
> - AArch64::LD3R_1D, AArch64::LD3R_16B, AArch64::LD3R_8H,
> - AArch64::LD3R_4S, AArch64::LD3R_2D
> - };
> - return SelectVLDDup(Node, false, 3, Opcodes);
> - }
> - case AArch64ISD::NEON_LD4DUP: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD4R_8B, AArch64::LD4R_4H, AArch64::LD4R_2S,
> - AArch64::LD4R_1D, AArch64::LD4R_16B, AArch64::LD4R_8H,
> - AArch64::LD4R_4S, AArch64::LD4R_2D
> - };
> - return SelectVLDDup(Node, false, 4, Opcodes);
> - }
> - case AArch64ISD::NEON_LD2DUP_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD2R_WB_8B_fixed, AArch64::LD2R_WB_4H_fixed,
> - AArch64::LD2R_WB_2S_fixed, AArch64::LD2R_WB_1D_fixed,
> - AArch64::LD2R_WB_16B_fixed, AArch64::LD2R_WB_8H_fixed,
> - AArch64::LD2R_WB_4S_fixed, AArch64::LD2R_WB_2D_fixed
> - };
> - return SelectVLDDup(Node, true, 2, Opcodes);
> - }
> - case AArch64ISD::NEON_LD3DUP_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD3R_WB_8B_fixed, AArch64::LD3R_WB_4H_fixed,
> - AArch64::LD3R_WB_2S_fixed, AArch64::LD3R_WB_1D_fixed,
> - AArch64::LD3R_WB_16B_fixed, AArch64::LD3R_WB_8H_fixed,
> - AArch64::LD3R_WB_4S_fixed, AArch64::LD3R_WB_2D_fixed
> - };
> - return SelectVLDDup(Node, true, 3, Opcodes);
> - }
> - case AArch64ISD::NEON_LD4DUP_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD4R_WB_8B_fixed, AArch64::LD4R_WB_4H_fixed,
> - AArch64::LD4R_WB_2S_fixed, AArch64::LD4R_WB_1D_fixed,
> - AArch64::LD4R_WB_16B_fixed, AArch64::LD4R_WB_8H_fixed,
> - AArch64::LD4R_WB_4S_fixed, AArch64::LD4R_WB_2D_fixed
> - };
> - return SelectVLDDup(Node, true, 4, Opcodes);
> - }
> - case AArch64ISD::NEON_LD2LN_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD2LN_WB_B_fixed, AArch64::LD2LN_WB_H_fixed,
> - AArch64::LD2LN_WB_S_fixed, AArch64::LD2LN_WB_D_fixed
> - };
> - return SelectVLDSTLane(Node, true, true, 2, Opcodes);
> - }
> - case AArch64ISD::NEON_LD3LN_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD3LN_WB_B_fixed, AArch64::LD3LN_WB_H_fixed,
> - AArch64::LD3LN_WB_S_fixed, AArch64::LD3LN_WB_D_fixed
> - };
> - return SelectVLDSTLane(Node, true, true, 3, Opcodes);
> - }
> - case AArch64ISD::NEON_LD4LN_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD4LN_WB_B_fixed, AArch64::LD4LN_WB_H_fixed,
> - AArch64::LD4LN_WB_S_fixed, AArch64::LD4LN_WB_D_fixed
> - };
> - return SelectVLDSTLane(Node, true, true, 4, Opcodes);
> - }
> - case AArch64ISD::NEON_ST2LN_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST2LN_WB_B_fixed, AArch64::ST2LN_WB_H_fixed,
> - AArch64::ST2LN_WB_S_fixed, AArch64::ST2LN_WB_D_fixed
> - };
> - return SelectVLDSTLane(Node, false, true, 2, Opcodes);
> - }
> - case AArch64ISD::NEON_ST3LN_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST3LN_WB_B_fixed, AArch64::ST3LN_WB_H_fixed,
> - AArch64::ST3LN_WB_S_fixed, AArch64::ST3LN_WB_D_fixed
> - };
> - return SelectVLDSTLane(Node, false, true, 3, Opcodes);
> - }
> - case AArch64ISD::NEON_ST4LN_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST4LN_WB_B_fixed, AArch64::ST4LN_WB_H_fixed,
> - AArch64::ST4LN_WB_S_fixed, AArch64::ST4LN_WB_D_fixed
> - };
> - return SelectVLDSTLane(Node, false, true, 4, Opcodes);
> - }
> - case AArch64ISD::NEON_ST1x2_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST1x2WB_8B_fixed, AArch64::ST1x2WB_4H_fixed,
> - AArch64::ST1x2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed,
> - AArch64::ST1x2WB_16B_fixed, AArch64::ST1x2WB_8H_fixed,
> - AArch64::ST1x2WB_4S_fixed, AArch64::ST1x2WB_2D_fixed
> - };
> - return SelectVST(Node, true, 2, Opcodes);
> - }
> - case AArch64ISD::NEON_ST1x3_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST1x3WB_8B_fixed, AArch64::ST1x3WB_4H_fixed,
> - AArch64::ST1x3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed,
> - AArch64::ST1x3WB_16B_fixed, AArch64::ST1x3WB_8H_fixed,
> - AArch64::ST1x3WB_4S_fixed, AArch64::ST1x3WB_2D_fixed
> - };
> - return SelectVST(Node, true, 3, Opcodes);
> - }
> - case AArch64ISD::NEON_ST1x4_UPD: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST1x4WB_8B_fixed, AArch64::ST1x4WB_4H_fixed,
> - AArch64::ST1x4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed,
> - AArch64::ST1x4WB_16B_fixed, AArch64::ST1x4WB_8H_fixed,
> - AArch64::ST1x4WB_4S_fixed, AArch64::ST1x4WB_2D_fixed
> - };
> - return SelectVST(Node, true, 4, Opcodes);
> - }
> - case ISD::INTRINSIC_WO_CHAIN: {
> - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
> - bool IsExt = false;
> - switch (IntNo) {
> - default:
> - break;
> - case Intrinsic::aarch64_neon_vtbx1:
> - IsExt = true;
> - case Intrinsic::aarch64_neon_vtbl1:
> - return SelectVTBL(Node, 1, IsExt);
> - case Intrinsic::aarch64_neon_vtbx2:
> - IsExt = true;
> - case Intrinsic::aarch64_neon_vtbl2:
> - return SelectVTBL(Node, 2, IsExt);
> - case Intrinsic::aarch64_neon_vtbx3:
> - IsExt = true;
> - case Intrinsic::aarch64_neon_vtbl3:
> - return SelectVTBL(Node, 3, IsExt);
> - case Intrinsic::aarch64_neon_vtbx4:
> - IsExt = true;
> - case Intrinsic::aarch64_neon_vtbl4:
> - return SelectVTBL(Node, 4, IsExt);
> - }
> - break;
> - }
> - case ISD::INTRINSIC_VOID:
> - case ISD::INTRINSIC_W_CHAIN: {
> - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
> - switch (IntNo) {
> - default:
> - break;
> - case Intrinsic::arm_neon_vld1: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD1_8B, AArch64::LD1_4H, AArch64::LD1_2S, AArch64::LD1_1D,
> - AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D
> - };
> - return SelectVLD(Node, false, 1, Opcodes);
> - }
> - case Intrinsic::arm_neon_vld2: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD2_8B, AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1x2_1D,
> - AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D
> - };
> - return SelectVLD(Node, false, 2, Opcodes);
> - }
> - case Intrinsic::arm_neon_vld3: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD3_8B, AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1x3_1D,
> - AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D
> - };
> - return SelectVLD(Node, false, 3, Opcodes);
> - }
> - case Intrinsic::arm_neon_vld4: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD4_8B, AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1x4_1D,
> - AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D
> - };
> - return SelectVLD(Node, false, 4, Opcodes);
> - }
> - case Intrinsic::aarch64_neon_vld1x2: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD1x2_8B, AArch64::LD1x2_4H, AArch64::LD1x2_2S,
> - AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H,
> - AArch64::LD1x2_4S, AArch64::LD1x2_2D
> - };
> - return SelectVLD(Node, false, 2, Opcodes);
> - }
> - case Intrinsic::aarch64_neon_vld1x3: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD1x3_8B, AArch64::LD1x3_4H, AArch64::LD1x3_2S,
> - AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H,
> - AArch64::LD1x3_4S, AArch64::LD1x3_2D
> - };
> - return SelectVLD(Node, false, 3, Opcodes);
> - }
> - case Intrinsic::aarch64_neon_vld1x4: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD1x4_8B, AArch64::LD1x4_4H, AArch64::LD1x4_2S,
> - AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H,
> - AArch64::LD1x4_4S, AArch64::LD1x4_2D
> - };
> - return SelectVLD(Node, false, 4, Opcodes);
> - }
> - case Intrinsic::arm_neon_vst1: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST1_8B, AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D,
> - AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D
> - };
> - return SelectVST(Node, false, 1, Opcodes);
> - }
> - case Intrinsic::arm_neon_vst2: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST2_8B, AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1x2_1D,
> - AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D
> - };
> - return SelectVST(Node, false, 2, Opcodes);
> - }
> - case Intrinsic::arm_neon_vst3: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST3_8B, AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1x3_1D,
> - AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D
> - };
> - return SelectVST(Node, false, 3, Opcodes);
> - }
> - case Intrinsic::arm_neon_vst4: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST4_8B, AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1x4_1D,
> - AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D
> - };
> - return SelectVST(Node, false, 4, Opcodes);
> - }
> - case Intrinsic::aarch64_neon_vst1x2: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST1x2_8B, AArch64::ST1x2_4H, AArch64::ST1x2_2S,
> - AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H,
> - AArch64::ST1x2_4S, AArch64::ST1x2_2D
> - };
> - return SelectVST(Node, false, 2, Opcodes);
> - }
> - case Intrinsic::aarch64_neon_vst1x3: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST1x3_8B, AArch64::ST1x3_4H, AArch64::ST1x3_2S,
> - AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H,
> - AArch64::ST1x3_4S, AArch64::ST1x3_2D
> - };
> - return SelectVST(Node, false, 3, Opcodes);
> - }
> - case Intrinsic::aarch64_neon_vst1x4: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST1x4_8B, AArch64::ST1x4_4H, AArch64::ST1x4_2S,
> - AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H,
> - AArch64::ST1x4_4S, AArch64::ST1x4_2D
> - };
> - return SelectVST(Node, false, 4, Opcodes);
> - }
> - case Intrinsic::arm_neon_vld2lane: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD2LN_B, AArch64::LD2LN_H, AArch64::LD2LN_S, AArch64::LD2LN_D
> - };
> - return SelectVLDSTLane(Node, true, false, 2, Opcodes);
> - }
> - case Intrinsic::arm_neon_vld3lane: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD3LN_B, AArch64::LD3LN_H, AArch64::LD3LN_S, AArch64::LD3LN_D
> - };
> - return SelectVLDSTLane(Node, true, false, 3, Opcodes);
> - }
> - case Intrinsic::arm_neon_vld4lane: {
> - static const uint16_t Opcodes[] = {
> - AArch64::LD4LN_B, AArch64::LD4LN_H, AArch64::LD4LN_S, AArch64::LD4LN_D
> - };
> - return SelectVLDSTLane(Node, true, false, 4, Opcodes);
> - }
> - case Intrinsic::arm_neon_vst2lane: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST2LN_B, AArch64::ST2LN_H, AArch64::ST2LN_S, AArch64::ST2LN_D
> - };
> - return SelectVLDSTLane(Node, false, false, 2, Opcodes);
> - }
> - case Intrinsic::arm_neon_vst3lane: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST3LN_B, AArch64::ST3LN_H, AArch64::ST3LN_S, AArch64::ST3LN_D
> - };
> - return SelectVLDSTLane(Node, false, false, 3, Opcodes);
> - }
> - case Intrinsic::arm_neon_vst4lane: {
> - static const uint16_t Opcodes[] = {
> - AArch64::ST4LN_B, AArch64::ST4LN_H, AArch64::ST4LN_S, AArch64::ST4LN_D
> - };
> - return SelectVLDSTLane(Node, false, false, 4, Opcodes);
> - }
> - } // End of switch IntNo
> - break;
> - } // End of case ISD::INTRINSIC_VOID and :ISD::INTRINSIC_W_CHAIN
> - default:
> - break; // Let generic code handle it
> - }
> -
> - SDNode *ResNode = SelectCode(Node);
> -
> - DEBUG(dbgs() << "=> ";
> - if (ResNode == nullptr || ResNode == Node)
> - Node->dump(CurDAG);
> - else
> - ResNode->dump(CurDAG);
> - dbgs() << "\n");
> -
> - return ResNode;
> -}
> -
> -/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
> -/// instruction scheduling.
> -FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
> - CodeGenOpt::Level OptLevel) {
> - return new AArch64DAGToDAGISel(TM, OptLevel);
> -}
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (removed)
> @@ -1,5564 +0,0 @@
> -//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file defines the interfaces that AArch64 uses to lower LLVM code into a
> -// selection DAG.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#include "AArch64.h"
> -#include "AArch64ISelLowering.h"
> -#include "AArch64MachineFunctionInfo.h"
> -#include "AArch64Subtarget.h"
> -#include "AArch64TargetMachine.h"
> -#include "AArch64TargetObjectFile.h"
> -#include "Utils/AArch64BaseInfo.h"
> -#include "llvm/CodeGen/Analysis.h"
> -#include "llvm/CodeGen/CallingConvLower.h"
> -#include "llvm/CodeGen/MachineFrameInfo.h"
> -#include "llvm/CodeGen/MachineInstrBuilder.h"
> -#include "llvm/CodeGen/MachineRegisterInfo.h"
> -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
> -#include "llvm/IR/CallingConv.h"
> -#include "llvm/Support/MathExtras.h"
> -
> -using namespace llvm;
> -
> -#define DEBUG_TYPE "aarch64-isel"
> -
> -static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
> - assert (TM.getSubtarget<AArch64Subtarget>().isTargetELF() &&
> - "unknown subtarget type");
> - return new AArch64ElfTargetObjectFile();
> -}
> -
> -AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
> - : TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) {
> -
> - const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
> -
> - // SIMD compares set the entire lane's bits to 1
> - setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
> -
> - // Scalar register <-> type mapping
> - addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
> - addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
> -
> - if (Subtarget->hasFPARMv8()) {
> - addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
> - addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
> - addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
> - addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
> - }
> -
> - if (Subtarget->hasNEON()) {
> - // And the vectors
> - addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass);
> - addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass);
> - addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass);
> - addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
> - addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
> - addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
> - addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
> - addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
> - addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
> - addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass);
> - addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass);
> - addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass);
> - addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass);
> - addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass);
> - addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass);
> - addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass);
> - }
> -
> - computeRegisterProperties();
> -
> - // We combine OR nodes for bitfield and NEON BSL operations.
> - setTargetDAGCombine(ISD::OR);
> -
> - setTargetDAGCombine(ISD::AND);
> - setTargetDAGCombine(ISD::SRA);
> - setTargetDAGCombine(ISD::SRL);
> - setTargetDAGCombine(ISD::SHL);
> -
> - setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
> - setTargetDAGCombine(ISD::INTRINSIC_VOID);
> - setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
> -
> - // AArch64 does not have i1 loads, or much of anything for i1 really.
> - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
> - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
> - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
> -
> - setStackPointerRegisterToSaveRestore(AArch64::XSP);
> - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
> - setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
> - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
> -
> - // We'll lower globals to wrappers for selection.
> - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
> - setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
> -
> - // A64 instructions have the comparison predicate attached to the user of the
> - // result, but having a separate comparison is valuable for matching.
> - setOperationAction(ISD::BR_CC, MVT::i32, Custom);
> - setOperationAction(ISD::BR_CC, MVT::i64, Custom);
> - setOperationAction(ISD::BR_CC, MVT::f32, Custom);
> - setOperationAction(ISD::BR_CC, MVT::f64, Custom);
> -
> - setOperationAction(ISD::SELECT, MVT::i32, Custom);
> - setOperationAction(ISD::SELECT, MVT::i64, Custom);
> - setOperationAction(ISD::SELECT, MVT::f32, Custom);
> - setOperationAction(ISD::SELECT, MVT::f64, Custom);
> -
> - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
> -
> - setOperationAction(ISD::BRCOND, MVT::Other, Custom);
> -
> - setOperationAction(ISD::SETCC, MVT::i32, Custom);
> - setOperationAction(ISD::SETCC, MVT::i64, Custom);
> - setOperationAction(ISD::SETCC, MVT::f32, Custom);
> - setOperationAction(ISD::SETCC, MVT::f64, Custom);
> -
> - setOperationAction(ISD::BR_JT, MVT::Other, Expand);
> - setOperationAction(ISD::JumpTable, MVT::i32, Custom);
> - setOperationAction(ISD::JumpTable, MVT::i64, Custom);
> -
> - setOperationAction(ISD::VASTART, MVT::Other, Custom);
> - setOperationAction(ISD::VACOPY, MVT::Other, Custom);
> - setOperationAction(ISD::VAEND, MVT::Other, Expand);
> - setOperationAction(ISD::VAARG, MVT::Other, Expand);
> -
> - setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
> - setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
> -
> - setOperationAction(ISD::ROTL, MVT::i32, Expand);
> - setOperationAction(ISD::ROTL, MVT::i64, Expand);
> -
> - setOperationAction(ISD::UREM, MVT::i32, Expand);
> - setOperationAction(ISD::UREM, MVT::i64, Expand);
> - setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
> - setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
> -
> - setOperationAction(ISD::SREM, MVT::i32, Expand);
> - setOperationAction(ISD::SREM, MVT::i64, Expand);
> - setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
> - setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
> -
> - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
> - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
> - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
> - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
> -
> - setOperationAction(ISD::CTPOP, MVT::i32, Expand);
> - setOperationAction(ISD::CTPOP, MVT::i64, Expand);
> -
> - // Legal floating-point operations.
> - setOperationAction(ISD::FABS, MVT::f32, Legal);
> - setOperationAction(ISD::FABS, MVT::f64, Legal);
> -
> - setOperationAction(ISD::FCEIL, MVT::f32, Legal);
> - setOperationAction(ISD::FCEIL, MVT::f64, Legal);
> -
> - setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
> - setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
> -
> - setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
> - setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
> -
> - setOperationAction(ISD::FNEG, MVT::f32, Legal);
> - setOperationAction(ISD::FNEG, MVT::f64, Legal);
> -
> - setOperationAction(ISD::FRINT, MVT::f32, Legal);
> - setOperationAction(ISD::FRINT, MVT::f64, Legal);
> -
> - setOperationAction(ISD::FSQRT, MVT::f32, Legal);
> - setOperationAction(ISD::FSQRT, MVT::f64, Legal);
> -
> - setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
> - setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
> -
> - setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
> - setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
> - setOperationAction(ISD::ConstantFP, MVT::f128, Legal);
> -
> - // Illegal floating-point operations.
> - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
> - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
> -
> - setOperationAction(ISD::FCOS, MVT::f32, Expand);
> - setOperationAction(ISD::FCOS, MVT::f64, Expand);
> -
> - setOperationAction(ISD::FEXP, MVT::f32, Expand);
> - setOperationAction(ISD::FEXP, MVT::f64, Expand);
> -
> - setOperationAction(ISD::FEXP2, MVT::f32, Expand);
> - setOperationAction(ISD::FEXP2, MVT::f64, Expand);
> -
> - setOperationAction(ISD::FLOG, MVT::f32, Expand);
> - setOperationAction(ISD::FLOG, MVT::f64, Expand);
> -
> - setOperationAction(ISD::FLOG2, MVT::f32, Expand);
> - setOperationAction(ISD::FLOG2, MVT::f64, Expand);
> -
> - setOperationAction(ISD::FLOG10, MVT::f32, Expand);
> - setOperationAction(ISD::FLOG10, MVT::f64, Expand);
> -
> - setOperationAction(ISD::FPOW, MVT::f32, Expand);
> - setOperationAction(ISD::FPOW, MVT::f64, Expand);
> -
> - setOperationAction(ISD::FPOWI, MVT::f32, Expand);
> - setOperationAction(ISD::FPOWI, MVT::f64, Expand);
> -
> - setOperationAction(ISD::FREM, MVT::f32, Expand);
> - setOperationAction(ISD::FREM, MVT::f64, Expand);
> -
> - setOperationAction(ISD::FSIN, MVT::f32, Expand);
> - setOperationAction(ISD::FSIN, MVT::f64, Expand);
> -
> - setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
> - setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
> -
> - // Virtually no operation on f128 is legal, but LLVM can't expand them when
> - // there's a valid register class, so we need custom operations in most cases.
> - setOperationAction(ISD::FABS, MVT::f128, Expand);
> - setOperationAction(ISD::FADD, MVT::f128, Custom);
> - setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
> - setOperationAction(ISD::FCOS, MVT::f128, Expand);
> - setOperationAction(ISD::FDIV, MVT::f128, Custom);
> - setOperationAction(ISD::FMA, MVT::f128, Expand);
> - setOperationAction(ISD::FMUL, MVT::f128, Custom);
> - setOperationAction(ISD::FNEG, MVT::f128, Expand);
> - setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
> - setOperationAction(ISD::FP_ROUND, MVT::f128, Expand);
> - setOperationAction(ISD::FPOW, MVT::f128, Expand);
> - setOperationAction(ISD::FREM, MVT::f128, Expand);
> - setOperationAction(ISD::FRINT, MVT::f128, Expand);
> - setOperationAction(ISD::FSIN, MVT::f128, Expand);
> - setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
> - setOperationAction(ISD::FSQRT, MVT::f128, Expand);
> - setOperationAction(ISD::FSUB, MVT::f128, Custom);
> - setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
> - setOperationAction(ISD::SETCC, MVT::f128, Custom);
> - setOperationAction(ISD::BR_CC, MVT::f128, Custom);
> - setOperationAction(ISD::SELECT, MVT::f128, Expand);
> - setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
> - setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
> -
> - // Lowering for many of the conversions is actually specified by the non-f128
> - // type. The LowerXXX function will be trivial when f128 isn't involved.
> - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
> - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
> - setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
> - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
> - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
> - setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
> - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
> - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
> - setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
> - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
> - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
> - setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
> - setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
> - setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
> -
> - // i128 shift operation support
> - setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
> - setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
> - setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
> -
> - // This prevents LLVM trying to compress double constants into a floating
> - // constant-pool entry and trying to load from there. It's of doubtful benefit
> - // for A64: we'd need LDR followed by FCVT, I believe.
> - setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
> - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
> - setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
> -
> - setTruncStoreAction(MVT::f128, MVT::f64, Expand);
> - setTruncStoreAction(MVT::f128, MVT::f32, Expand);
> - setTruncStoreAction(MVT::f128, MVT::f16, Expand);
> - setTruncStoreAction(MVT::f64, MVT::f32, Expand);
> - setTruncStoreAction(MVT::f64, MVT::f16, Expand);
> - setTruncStoreAction(MVT::f32, MVT::f16, Expand);
> -
> - setExceptionPointerRegister(AArch64::X0);
> - setExceptionSelectorRegister(AArch64::X1);
> -
> - if (Subtarget->hasNEON()) {
> - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Expand);
> - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
> - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
> - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v1i64, Expand);
> - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v16i8, Expand);
> - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Expand);
> - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
> - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Expand);
> -
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom);
> - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
> -
> - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
> - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
> - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
> - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
> - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
> - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
> - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
> - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
> - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Custom);
> - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
> - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom);
> - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
> -
> - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32, Legal);
> - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal);
> - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
> - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
> - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
> - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal);
> - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal);
> -
> - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i8, Custom);
> - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16, Custom);
> - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom);
> - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom);
> - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
> -
> - setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
> - setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
> - setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
> - setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
> - setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
> - setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
> - setOperationAction(ISD::SETCC, MVT::v1i64, Custom);
> - setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
> - setOperationAction(ISD::SETCC, MVT::v2f32, Custom);
> - setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
> - setOperationAction(ISD::SETCC, MVT::v1f64, Custom);
> - setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
> -
> - setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
> - setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
> - setOperationAction(ISD::FFLOOR, MVT::v1f64, Legal);
> - setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
> -
> - setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);
> - setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
> - setOperationAction(ISD::FCEIL, MVT::v1f64, Legal);
> - setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
> -
> - setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);
> - setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
> - setOperationAction(ISD::FTRUNC, MVT::v1f64, Legal);
> - setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
> -
> - setOperationAction(ISD::FRINT, MVT::v2f32, Legal);
> - setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
> - setOperationAction(ISD::FRINT, MVT::v1f64, Legal);
> - setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
> -
> - setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal);
> - setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
> - setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Legal);
> - setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
> -
> - setOperationAction(ISD::FROUND, MVT::v2f32, Legal);
> - setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
> - setOperationAction(ISD::FROUND, MVT::v1f64, Legal);
> - setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
> -
> - setOperationAction(ISD::SINT_TO_FP, MVT::v1i8, Custom);
> - setOperationAction(ISD::SINT_TO_FP, MVT::v1i16, Custom);
> - setOperationAction(ISD::SINT_TO_FP, MVT::v1i32, Custom);
> - setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
> - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
> - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
> -
> - setOperationAction(ISD::UINT_TO_FP, MVT::v1i8, Custom);
> - setOperationAction(ISD::UINT_TO_FP, MVT::v1i16, Custom);
> - setOperationAction(ISD::UINT_TO_FP, MVT::v1i32, Custom);
> - setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
> - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
> - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
> -
> - setOperationAction(ISD::FP_TO_SINT, MVT::v1i8, Custom);
> - setOperationAction(ISD::FP_TO_SINT, MVT::v1i16, Custom);
> - setOperationAction(ISD::FP_TO_SINT, MVT::v1i32, Custom);
> - setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
> - setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
> - setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Custom);
> -
> - setOperationAction(ISD::FP_TO_UINT, MVT::v1i8, Custom);
> - setOperationAction(ISD::FP_TO_UINT, MVT::v1i16, Custom);
> - setOperationAction(ISD::FP_TO_UINT, MVT::v1i32, Custom);
> - setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
> - setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
> - setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Custom);
> -
> - // Neon does not support vector divide/remainder operations except
> - // floating-point divide.
> - setOperationAction(ISD::SDIV, MVT::v1i8, Expand);
> - setOperationAction(ISD::SDIV, MVT::v8i8, Expand);
> - setOperationAction(ISD::SDIV, MVT::v16i8, Expand);
> - setOperationAction(ISD::SDIV, MVT::v1i16, Expand);
> - setOperationAction(ISD::SDIV, MVT::v4i16, Expand);
> - setOperationAction(ISD::SDIV, MVT::v8i16, Expand);
> - setOperationAction(ISD::SDIV, MVT::v1i32, Expand);
> - setOperationAction(ISD::SDIV, MVT::v2i32, Expand);
> - setOperationAction(ISD::SDIV, MVT::v4i32, Expand);
> - setOperationAction(ISD::SDIV, MVT::v1i64, Expand);
> - setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
> -
> - setOperationAction(ISD::UDIV, MVT::v1i8, Expand);
> - setOperationAction(ISD::UDIV, MVT::v8i8, Expand);
> - setOperationAction(ISD::UDIV, MVT::v16i8, Expand);
> - setOperationAction(ISD::UDIV, MVT::v1i16, Expand);
> - setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
> - setOperationAction(ISD::UDIV, MVT::v8i16, Expand);
> - setOperationAction(ISD::UDIV, MVT::v1i32, Expand);
> - setOperationAction(ISD::UDIV, MVT::v2i32, Expand);
> - setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
> - setOperationAction(ISD::UDIV, MVT::v1i64, Expand);
> - setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
> -
> - setOperationAction(ISD::SREM, MVT::v1i8, Expand);
> - setOperationAction(ISD::SREM, MVT::v8i8, Expand);
> - setOperationAction(ISD::SREM, MVT::v16i8, Expand);
> - setOperationAction(ISD::SREM, MVT::v1i16, Expand);
> - setOperationAction(ISD::SREM, MVT::v4i16, Expand);
> - setOperationAction(ISD::SREM, MVT::v8i16, Expand);
> - setOperationAction(ISD::SREM, MVT::v1i32, Expand);
> - setOperationAction(ISD::SREM, MVT::v2i32, Expand);
> - setOperationAction(ISD::SREM, MVT::v4i32, Expand);
> - setOperationAction(ISD::SREM, MVT::v1i64, Expand);
> - setOperationAction(ISD::SREM, MVT::v2i64, Expand);
> -
> - setOperationAction(ISD::UREM, MVT::v1i8, Expand);
> - setOperationAction(ISD::UREM, MVT::v8i8, Expand);
> - setOperationAction(ISD::UREM, MVT::v16i8, Expand);
> - setOperationAction(ISD::UREM, MVT::v1i16, Expand);
> - setOperationAction(ISD::UREM, MVT::v4i16, Expand);
> - setOperationAction(ISD::UREM, MVT::v8i16, Expand);
> - setOperationAction(ISD::UREM, MVT::v1i32, Expand);
> - setOperationAction(ISD::UREM, MVT::v2i32, Expand);
> - setOperationAction(ISD::UREM, MVT::v4i32, Expand);
> - setOperationAction(ISD::UREM, MVT::v1i64, Expand);
> - setOperationAction(ISD::UREM, MVT::v2i64, Expand);
> -
> - setOperationAction(ISD::FREM, MVT::v2f32, Expand);
> - setOperationAction(ISD::FREM, MVT::v4f32, Expand);
> - setOperationAction(ISD::FREM, MVT::v1f64, Expand);
> - setOperationAction(ISD::FREM, MVT::v2f64, Expand);
> -
> - setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
> - setOperationAction(ISD::SELECT, MVT::v16i8, Expand);
> - setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
> - setOperationAction(ISD::SELECT, MVT::v8i16, Expand);
> - setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
> - setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
> - setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
> - setOperationAction(ISD::SELECT, MVT::v2i64, Expand);
> - setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
> - setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
> - setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
> - setOperationAction(ISD::SELECT, MVT::v2f64, Expand);
> -
> - setOperationAction(ISD::SELECT_CC, MVT::v8i8, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::v16i8, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::v4i16, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::v8i16, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::v2i32, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::v4i32, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::v1i64, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::v2i64, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::v2f32, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::v4f32, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::v1f64, Custom);
> - setOperationAction(ISD::SELECT_CC, MVT::v2f64, Custom);
> -
> - // Vector ExtLoad and TruncStore are expanded.
> - for (unsigned I = MVT::FIRST_VECTOR_VALUETYPE;
> - I <= MVT::LAST_VECTOR_VALUETYPE; ++I) {
> - MVT VT = (MVT::SimpleValueType) I;
> - setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
> - setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
> - setLoadExtAction(ISD::EXTLOAD, VT, Expand);
> - for (unsigned II = MVT::FIRST_VECTOR_VALUETYPE;
> - II <= MVT::LAST_VECTOR_VALUETYPE; ++II) {
> - MVT VT1 = (MVT::SimpleValueType) II;
> - // A TruncStore has two vector types of the same number of elements
> - // and different element sizes.
> - if (VT.getVectorNumElements() == VT1.getVectorNumElements() &&
> - VT.getVectorElementType().getSizeInBits()
> - > VT1.getVectorElementType().getSizeInBits())
> - setTruncStoreAction(VT, VT1, Expand);
> - }
> -
> - setOperationAction(ISD::MULHS, VT, Expand);
> - setOperationAction(ISD::SMUL_LOHI, VT, Expand);
> - setOperationAction(ISD::MULHU, VT, Expand);
> - setOperationAction(ISD::UMUL_LOHI, VT, Expand);
> -
> - setOperationAction(ISD::BSWAP, VT, Expand);
> - }
> -
> - // There is no v1i64/v2i64 multiply, expand v1i64/v2i64 to GPR i64 multiply.
> - // FIXME: For a v2i64 multiply, we copy VPR to GPR and do 2 i64 multiplies,
> - // and then copy back to VPR. This solution may be optimized by Following 3
> - // NEON instructions:
> - // pmull v2.1q, v0.1d, v1.1d
> - // pmull2 v3.1q, v0.2d, v1.2d
> - // ins v2.d[1], v3.d[0]
> - // As currently we can't verify the correctness of such assumption, we can
> - // do such optimization in the future.
> - setOperationAction(ISD::MUL, MVT::v1i64, Expand);
> - setOperationAction(ISD::MUL, MVT::v2i64, Expand);
> -
> - setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
> - setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
> - setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
> - setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
> - setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
> - setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
> - setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
> - setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
> - setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
> - }
> -
> - setTargetDAGCombine(ISD::SIGN_EXTEND);
> - setTargetDAGCombine(ISD::VSELECT);
> -
> - MaskAndBranchFoldingIsLegal = true;
> -}
> -
> -EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
> - // It's reasonably important that this value matches the "natural" legal
> - // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself
> - // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64).
> - if (!VT.isVector()) return MVT::i32;
> - return VT.changeVectorElementTypeToInteger();
> -}
> -
> -static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
> - unsigned &LdrOpc,
> - unsigned &StrOpc) {
> - static const unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
> - AArch64::LDXR_word, AArch64::LDXR_dword};
> - static const unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
> - AArch64::LDAXR_word, AArch64::LDAXR_dword};
> - static const unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
> - AArch64::STXR_word, AArch64::STXR_dword};
> - static const unsigned StoreRels[] = {AArch64::STLXR_byte,AArch64::STLXR_hword,
> - AArch64::STLXR_word, AArch64::STLXR_dword};
> -
> - const unsigned *LoadOps, *StoreOps;
> - if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
> - LoadOps = LoadAcqs;
> - else
> - LoadOps = LoadBares;
> -
> - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
> - StoreOps = StoreRels;
> - else
> - StoreOps = StoreBares;
> -
> - assert(isPowerOf2_32(Size) && Size <= 8 &&
> - "unsupported size for atomic binary op!");
> -
> - LdrOpc = LoadOps[Log2_32(Size)];
> - StrOpc = StoreOps[Log2_32(Size)];
> -}
> -
> -// FIXME: AArch64::DTripleRegClass and AArch64::QTripleRegClass don't really
> -// have value type mapped, and they are both being defined as MVT::untyped.
> -// Without knowing the MVT type, MachineLICM::getRegisterClassIDAndCost
> -// would fail to figure out the register pressure correctly.
> -std::pair<const TargetRegisterClass*, uint8_t>
> -AArch64TargetLowering::findRepresentativeClass(MVT VT) const{
> - const TargetRegisterClass *RRC = nullptr;
> - uint8_t Cost = 1;
> - switch (VT.SimpleTy) {
> - default:
> - return TargetLowering::findRepresentativeClass(VT);
> - case MVT::v4i64:
> - RRC = &AArch64::QPairRegClass;
> - Cost = 2;
> - break;
> - case MVT::v8i64:
> - RRC = &AArch64::QQuadRegClass;
> - Cost = 4;
> - break;
> - }
> - return std::make_pair(RRC, Cost);
> -}
> -
> -MachineBasicBlock *
> -AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
> - unsigned Size,
> - unsigned BinOpcode) const {
> - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
> - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> -
> - const BasicBlock *LLVM_BB = BB->getBasicBlock();
> - MachineFunction *MF = BB->getParent();
> - MachineFunction::iterator It = BB;
> - ++It;
> -
> - unsigned dest = MI->getOperand(0).getReg();
> - unsigned ptr = MI->getOperand(1).getReg();
> - unsigned incr = MI->getOperand(2).getReg();
> - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
> - DebugLoc dl = MI->getDebugLoc();
> -
> - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
> -
> - unsigned ldrOpc, strOpc;
> - getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
> -
> - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
> - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
> - MF->insert(It, loopMBB);
> - MF->insert(It, exitMBB);
> -
> - // Transfer the remainder of BB and its successor edges to exitMBB.
> - exitMBB->splice(exitMBB->begin(), BB,
> - std::next(MachineBasicBlock::iterator(MI)), BB->end());
> - exitMBB->transferSuccessorsAndUpdatePHIs(BB);
> -
> - const TargetRegisterClass *TRC
> - = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
> - unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
> -
> - // thisMBB:
> - // ...
> - // fallthrough --> loopMBB
> - BB->addSuccessor(loopMBB);
> -
> - // loopMBB:
> - // ldxr dest, ptr
> - // <binop> scratch, dest, incr
> - // stxr stxr_status, scratch, ptr
> - // cbnz stxr_status, loopMBB
> - // fallthrough --> exitMBB
> - BB = loopMBB;
> - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
> - if (BinOpcode) {
> - // All arithmetic operations we'll be creating are designed to take an extra
> - // shift or extend operand, which we can conveniently set to zero.
> -
> - // Operand order needs to go the other way for NAND.
> - if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl)
> - BuildMI(BB, dl, TII->get(BinOpcode), scratch)
> - .addReg(incr).addReg(dest).addImm(0);
> - else
> - BuildMI(BB, dl, TII->get(BinOpcode), scratch)
> - .addReg(dest).addReg(incr).addImm(0);
> - }
> -
> - // From the stxr, the register is GPR32; from the cmp it's GPR32wsp
> - unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
> - MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
> -
> - BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
> - BuildMI(BB, dl, TII->get(AArch64::CBNZw))
> - .addReg(stxr_status).addMBB(loopMBB);
> -
> - BB->addSuccessor(loopMBB);
> - BB->addSuccessor(exitMBB);
> -
> - // exitMBB:
> - // ...
> - BB = exitMBB;
> -
> - MI->eraseFromParent(); // The instruction is gone now.
> -
> - return BB;
> -}
> -
> -MachineBasicBlock *
> -AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
> - MachineBasicBlock *BB,
> - unsigned Size,
> - unsigned CmpOp,
> - A64CC::CondCodes Cond) const {
> - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> -
> - const BasicBlock *LLVM_BB = BB->getBasicBlock();
> - MachineFunction *MF = BB->getParent();
> - MachineFunction::iterator It = BB;
> - ++It;
> -
> - unsigned dest = MI->getOperand(0).getReg();
> - unsigned ptr = MI->getOperand(1).getReg();
> - unsigned incr = MI->getOperand(2).getReg();
> - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
> -
> - unsigned oldval = dest;
> - DebugLoc dl = MI->getDebugLoc();
> -
> - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
> - const TargetRegisterClass *TRC, *TRCsp;
> - if (Size == 8) {
> - TRC = &AArch64::GPR64RegClass;
> - TRCsp = &AArch64::GPR64xspRegClass;
> - } else {
> - TRC = &AArch64::GPR32RegClass;
> - TRCsp = &AArch64::GPR32wspRegClass;
> - }
> -
> - unsigned ldrOpc, strOpc;
> - getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
> -
> - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
> - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
> - MF->insert(It, loopMBB);
> - MF->insert(It, exitMBB);
> -
> - // Transfer the remainder of BB and its successor edges to exitMBB.
> - exitMBB->splice(exitMBB->begin(), BB,
> - std::next(MachineBasicBlock::iterator(MI)), BB->end());
> - exitMBB->transferSuccessorsAndUpdatePHIs(BB);
> -
> - unsigned scratch = MRI.createVirtualRegister(TRC);
> - MRI.constrainRegClass(scratch, TRCsp);
> -
> - // thisMBB:
> - // ...
> - // fallthrough --> loopMBB
> - BB->addSuccessor(loopMBB);
> -
> - // loopMBB:
> - // ldxr dest, ptr
> - // cmp incr, dest (, sign extend if necessary)
> - // csel scratch, dest, incr, cond
> - // stxr stxr_status, scratch, ptr
> - // cbnz stxr_status, loopMBB
> - // fallthrough --> exitMBB
> - BB = loopMBB;
> - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
> -
> - // Build compare and cmov instructions.
> - MRI.constrainRegClass(incr, TRCsp);
> - BuildMI(BB, dl, TII->get(CmpOp))
> - .addReg(incr).addReg(oldval).addImm(0);
> -
> - BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc),
> - scratch)
> - .addReg(oldval).addReg(incr).addImm(Cond);
> -
> - unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
> - MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
> -
> - BuildMI(BB, dl, TII->get(strOpc), stxr_status)
> - .addReg(scratch).addReg(ptr);
> - BuildMI(BB, dl, TII->get(AArch64::CBNZw))
> - .addReg(stxr_status).addMBB(loopMBB);
> -
> - BB->addSuccessor(loopMBB);
> - BB->addSuccessor(exitMBB);
> -
> - // exitMBB:
> - // ...
> - BB = exitMBB;
> -
> - MI->eraseFromParent(); // The instruction is gone now.
> -
> - return BB;
> -}
> -
> -MachineBasicBlock *
> -AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
> - MachineBasicBlock *BB,
> - unsigned Size) const {
> - unsigned dest = MI->getOperand(0).getReg();
> - unsigned ptr = MI->getOperand(1).getReg();
> - unsigned oldval = MI->getOperand(2).getReg();
> - unsigned newval = MI->getOperand(3).getReg();
> - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
> - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> - DebugLoc dl = MI->getDebugLoc();
> -
> - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
> - const TargetRegisterClass *TRCsp;
> - TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
> -
> - unsigned ldrOpc, strOpc;
> - getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
> -
> - MachineFunction *MF = BB->getParent();
> - const BasicBlock *LLVM_BB = BB->getBasicBlock();
> - MachineFunction::iterator It = BB;
> - ++It; // insert the new blocks after the current block
> -
> - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
> - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
> - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
> - MF->insert(It, loop1MBB);
> - MF->insert(It, loop2MBB);
> - MF->insert(It, exitMBB);
> -
> - // Transfer the remainder of BB and its successor edges to exitMBB.
> - exitMBB->splice(exitMBB->begin(), BB,
> - std::next(MachineBasicBlock::iterator(MI)), BB->end());
> - exitMBB->transferSuccessorsAndUpdatePHIs(BB);
> -
> - // thisMBB:
> - // ...
> - // fallthrough --> loop1MBB
> - BB->addSuccessor(loop1MBB);
> -
> - // loop1MBB:
> - // ldxr dest, [ptr]
> - // cmp dest, oldval
> - // b.ne exitMBB
> - BB = loop1MBB;
> - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
> -
> - unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl;
> - MRI.constrainRegClass(dest, TRCsp);
> - BuildMI(BB, dl, TII->get(CmpOp))
> - .addReg(dest).addReg(oldval).addImm(0);
> - BuildMI(BB, dl, TII->get(AArch64::Bcc))
> - .addImm(A64CC::NE).addMBB(exitMBB);
> - BB->addSuccessor(loop2MBB);
> - BB->addSuccessor(exitMBB);
> -
> - // loop2MBB:
> - // strex stxr_status, newval, [ptr]
> - // cbnz stxr_status, loop1MBB
> - BB = loop2MBB;
> - unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
> - MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
> -
> - BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
> - BuildMI(BB, dl, TII->get(AArch64::CBNZw))
> - .addReg(stxr_status).addMBB(loop1MBB);
> - BB->addSuccessor(loop1MBB);
> - BB->addSuccessor(exitMBB);
> -
> - // exitMBB:
> - // ...
> - BB = exitMBB;
> -
> - MI->eraseFromParent(); // The instruction is gone now.
> -
> - return BB;
> -}
> -
> -MachineBasicBlock *
> -AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
> - MachineBasicBlock *MBB) const {
> - // We materialise the F128CSEL pseudo-instruction using conditional branches
> - // and loads, giving an instruciton sequence like:
> - // str q0, [sp]
> - // b.ne IfTrue
> - // b Finish
> - // IfTrue:
> - // str q1, [sp]
> - // Finish:
> - // ldr q0, [sp]
> - //
> - // Using virtual registers would probably not be beneficial since COPY
> - // instructions are expensive for f128 (there's no actual instruction to
> - // implement them).
> - //
> - // An alternative would be to do an integer-CSEL on some address. E.g.:
> - // mov x0, sp
> - // add x1, sp, #16
> - // str q0, [x0]
> - // str q1, [x1]
> - // csel x0, x0, x1, ne
> - // ldr q0, [x0]
> - //
> - // It's unclear which approach is actually optimal.
> - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
> - MachineFunction *MF = MBB->getParent();
> - const BasicBlock *LLVM_BB = MBB->getBasicBlock();
> - DebugLoc DL = MI->getDebugLoc();
> - MachineFunction::iterator It = MBB;
> - ++It;
> -
> - unsigned DestReg = MI->getOperand(0).getReg();
> - unsigned IfTrueReg = MI->getOperand(1).getReg();
> - unsigned IfFalseReg = MI->getOperand(2).getReg();
> - unsigned CondCode = MI->getOperand(3).getImm();
> - bool NZCVKilled = MI->getOperand(4).isKill();
> -
> - MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
> - MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
> - MF->insert(It, TrueBB);
> - MF->insert(It, EndBB);
> -
> - // Transfer rest of current basic-block to EndBB
> - EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
> - MBB->end());
> - EndBB->transferSuccessorsAndUpdatePHIs(MBB);
> -
> - // We need somewhere to store the f128 value needed.
> - int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16);
> -
> - // [... start of incoming MBB ...]
> - // str qIFFALSE, [sp]
> - // b.cc IfTrue
> - // b Done
> - BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR))
> - .addReg(IfFalseReg)
> - .addFrameIndex(ScratchFI)
> - .addImm(0);
> - BuildMI(MBB, DL, TII->get(AArch64::Bcc))
> - .addImm(CondCode)
> - .addMBB(TrueBB);
> - BuildMI(MBB, DL, TII->get(AArch64::Bimm))
> - .addMBB(EndBB);
> - MBB->addSuccessor(TrueBB);
> - MBB->addSuccessor(EndBB);
> -
> - if (!NZCVKilled) {
> - // NZCV is live-through TrueBB.
> - TrueBB->addLiveIn(AArch64::NZCV);
> - EndBB->addLiveIn(AArch64::NZCV);
> - }
> -
> - // IfTrue:
> - // str qIFTRUE, [sp]
> - BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
> - .addReg(IfTrueReg)
> - .addFrameIndex(ScratchFI)
> - .addImm(0);
> -
> - // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the
> - // blocks.
> - TrueBB->addSuccessor(EndBB);
> -
> - // Done:
> - // ldr qDEST, [sp]
> - // [... rest of incoming MBB ...]
> - MachineInstr *StartOfEnd = EndBB->begin();
> - BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
> - .addFrameIndex(ScratchFI)
> - .addImm(0);
> -
> - MI->eraseFromParent();
> - return EndBB;
> -}
> -
> -MachineBasicBlock *
> -AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
> - MachineBasicBlock *MBB) const {
> - switch (MI->getOpcode()) {
> - default: llvm_unreachable("Unhandled instruction with custom inserter");
> - case AArch64::F128CSEL:
> - return EmitF128CSEL(MI, MBB);
> - case AArch64::ATOMIC_LOAD_ADD_I8:
> - return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl);
> - case AArch64::ATOMIC_LOAD_ADD_I16:
> - return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl);
> - case AArch64::ATOMIC_LOAD_ADD_I32:
> - return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl);
> - case AArch64::ATOMIC_LOAD_ADD_I64:
> - return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl);
> -
> - case AArch64::ATOMIC_LOAD_SUB_I8:
> - return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl);
> - case AArch64::ATOMIC_LOAD_SUB_I16:
> - return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl);
> - case AArch64::ATOMIC_LOAD_SUB_I32:
> - return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl);
> - case AArch64::ATOMIC_LOAD_SUB_I64:
> - return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl);
> -
> - case AArch64::ATOMIC_LOAD_AND_I8:
> - return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl);
> - case AArch64::ATOMIC_LOAD_AND_I16:
> - return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl);
> - case AArch64::ATOMIC_LOAD_AND_I32:
> - return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl);
> - case AArch64::ATOMIC_LOAD_AND_I64:
> - return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl);
> -
> - case AArch64::ATOMIC_LOAD_OR_I8:
> - return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl);
> - case AArch64::ATOMIC_LOAD_OR_I16:
> - return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl);
> - case AArch64::ATOMIC_LOAD_OR_I32:
> - return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl);
> - case AArch64::ATOMIC_LOAD_OR_I64:
> - return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl);
> -
> - case AArch64::ATOMIC_LOAD_XOR_I8:
> - return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl);
> - case AArch64::ATOMIC_LOAD_XOR_I16:
> - return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl);
> - case AArch64::ATOMIC_LOAD_XOR_I32:
> - return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl);
> - case AArch64::ATOMIC_LOAD_XOR_I64:
> - return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl);
> -
> - case AArch64::ATOMIC_LOAD_NAND_I8:
> - return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl);
> - case AArch64::ATOMIC_LOAD_NAND_I16:
> - return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl);
> - case AArch64::ATOMIC_LOAD_NAND_I32:
> - return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl);
> - case AArch64::ATOMIC_LOAD_NAND_I64:
> - return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl);
> -
> - case AArch64::ATOMIC_LOAD_MIN_I8:
> - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT);
> - case AArch64::ATOMIC_LOAD_MIN_I16:
> - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT);
> - case AArch64::ATOMIC_LOAD_MIN_I32:
> - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT);
> - case AArch64::ATOMIC_LOAD_MIN_I64:
> - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT);
> -
> - case AArch64::ATOMIC_LOAD_MAX_I8:
> - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT);
> - case AArch64::ATOMIC_LOAD_MAX_I16:
> - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT);
> - case AArch64::ATOMIC_LOAD_MAX_I32:
> - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT);
> - case AArch64::ATOMIC_LOAD_MAX_I64:
> - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT);
> -
> - case AArch64::ATOMIC_LOAD_UMIN_I8:
> - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI);
> - case AArch64::ATOMIC_LOAD_UMIN_I16:
> - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI);
> - case AArch64::ATOMIC_LOAD_UMIN_I32:
> - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI);
> - case AArch64::ATOMIC_LOAD_UMIN_I64:
> - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI);
> -
> - case AArch64::ATOMIC_LOAD_UMAX_I8:
> - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO);
> - case AArch64::ATOMIC_LOAD_UMAX_I16:
> - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO);
> - case AArch64::ATOMIC_LOAD_UMAX_I32:
> - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO);
> - case AArch64::ATOMIC_LOAD_UMAX_I64:
> - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO);
> -
> - case AArch64::ATOMIC_SWAP_I8:
> - return emitAtomicBinary(MI, MBB, 1, 0);
> - case AArch64::ATOMIC_SWAP_I16:
> - return emitAtomicBinary(MI, MBB, 2, 0);
> - case AArch64::ATOMIC_SWAP_I32:
> - return emitAtomicBinary(MI, MBB, 4, 0);
> - case AArch64::ATOMIC_SWAP_I64:
> - return emitAtomicBinary(MI, MBB, 8, 0);
> -
> - case AArch64::ATOMIC_CMP_SWAP_I8:
> - return emitAtomicCmpSwap(MI, MBB, 1);
> - case AArch64::ATOMIC_CMP_SWAP_I16:
> - return emitAtomicCmpSwap(MI, MBB, 2);
> - case AArch64::ATOMIC_CMP_SWAP_I32:
> - return emitAtomicCmpSwap(MI, MBB, 4);
> - case AArch64::ATOMIC_CMP_SWAP_I64:
> - return emitAtomicCmpSwap(MI, MBB, 8);
> - }
> -}
> -
> -
> -const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
> - switch (Opcode) {
> - case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC";
> - case AArch64ISD::Call: return "AArch64ISD::Call";
> - case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV";
> - case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad";
> - case AArch64ISD::BFI: return "AArch64ISD::BFI";
> - case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
> - case AArch64ISD::Ret: return "AArch64ISD::Ret";
> - case AArch64ISD::SBFX: return "AArch64ISD::SBFX";
> - case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC";
> - case AArch64ISD::SETCC: return "AArch64ISD::SETCC";
> - case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
> - case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
> - case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL";
> - case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
> - case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
> -
> - case AArch64ISD::NEON_MOVIMM:
> - return "AArch64ISD::NEON_MOVIMM";
> - case AArch64ISD::NEON_MVNIMM:
> - return "AArch64ISD::NEON_MVNIMM";
> - case AArch64ISD::NEON_FMOVIMM:
> - return "AArch64ISD::NEON_FMOVIMM";
> - case AArch64ISD::NEON_CMP:
> - return "AArch64ISD::NEON_CMP";
> - case AArch64ISD::NEON_CMPZ:
> - return "AArch64ISD::NEON_CMPZ";
> - case AArch64ISD::NEON_TST:
> - return "AArch64ISD::NEON_TST";
> - case AArch64ISD::NEON_QSHLs:
> - return "AArch64ISD::NEON_QSHLs";
> - case AArch64ISD::NEON_QSHLu:
> - return "AArch64ISD::NEON_QSHLu";
> - case AArch64ISD::NEON_VDUP:
> - return "AArch64ISD::NEON_VDUP";
> - case AArch64ISD::NEON_VDUPLANE:
> - return "AArch64ISD::NEON_VDUPLANE";
> - case AArch64ISD::NEON_REV16:
> - return "AArch64ISD::NEON_REV16";
> - case AArch64ISD::NEON_REV32:
> - return "AArch64ISD::NEON_REV32";
> - case AArch64ISD::NEON_REV64:
> - return "AArch64ISD::NEON_REV64";
> - case AArch64ISD::NEON_UZP1:
> - return "AArch64ISD::NEON_UZP1";
> - case AArch64ISD::NEON_UZP2:
> - return "AArch64ISD::NEON_UZP2";
> - case AArch64ISD::NEON_ZIP1:
> - return "AArch64ISD::NEON_ZIP1";
> - case AArch64ISD::NEON_ZIP2:
> - return "AArch64ISD::NEON_ZIP2";
> - case AArch64ISD::NEON_TRN1:
> - return "AArch64ISD::NEON_TRN1";
> - case AArch64ISD::NEON_TRN2:
> - return "AArch64ISD::NEON_TRN2";
> - case AArch64ISD::NEON_LD1_UPD:
> - return "AArch64ISD::NEON_LD1_UPD";
> - case AArch64ISD::NEON_LD2_UPD:
> - return "AArch64ISD::NEON_LD2_UPD";
> - case AArch64ISD::NEON_LD3_UPD:
> - return "AArch64ISD::NEON_LD3_UPD";
> - case AArch64ISD::NEON_LD4_UPD:
> - return "AArch64ISD::NEON_LD4_UPD";
> - case AArch64ISD::NEON_ST1_UPD:
> - return "AArch64ISD::NEON_ST1_UPD";
> - case AArch64ISD::NEON_ST2_UPD:
> - return "AArch64ISD::NEON_ST2_UPD";
> - case AArch64ISD::NEON_ST3_UPD:
> - return "AArch64ISD::NEON_ST3_UPD";
> - case AArch64ISD::NEON_ST4_UPD:
> - return "AArch64ISD::NEON_ST4_UPD";
> - case AArch64ISD::NEON_LD1x2_UPD:
> - return "AArch64ISD::NEON_LD1x2_UPD";
> - case AArch64ISD::NEON_LD1x3_UPD:
> - return "AArch64ISD::NEON_LD1x3_UPD";
> - case AArch64ISD::NEON_LD1x4_UPD:
> - return "AArch64ISD::NEON_LD1x4_UPD";
> - case AArch64ISD::NEON_ST1x2_UPD:
> - return "AArch64ISD::NEON_ST1x2_UPD";
> - case AArch64ISD::NEON_ST1x3_UPD:
> - return "AArch64ISD::NEON_ST1x3_UPD";
> - case AArch64ISD::NEON_ST1x4_UPD:
> - return "AArch64ISD::NEON_ST1x4_UPD";
> - case AArch64ISD::NEON_LD2DUP:
> - return "AArch64ISD::NEON_LD2DUP";
> - case AArch64ISD::NEON_LD3DUP:
> - return "AArch64ISD::NEON_LD3DUP";
> - case AArch64ISD::NEON_LD4DUP:
> - return "AArch64ISD::NEON_LD4DUP";
> - case AArch64ISD::NEON_LD2DUP_UPD:
> - return "AArch64ISD::NEON_LD2DUP_UPD";
> - case AArch64ISD::NEON_LD3DUP_UPD:
> - return "AArch64ISD::NEON_LD3DUP_UPD";
> - case AArch64ISD::NEON_LD4DUP_UPD:
> - return "AArch64ISD::NEON_LD4DUP_UPD";
> - case AArch64ISD::NEON_LD2LN_UPD:
> - return "AArch64ISD::NEON_LD2LN_UPD";
> - case AArch64ISD::NEON_LD3LN_UPD:
> - return "AArch64ISD::NEON_LD3LN_UPD";
> - case AArch64ISD::NEON_LD4LN_UPD:
> - return "AArch64ISD::NEON_LD4LN_UPD";
> - case AArch64ISD::NEON_ST2LN_UPD:
> - return "AArch64ISD::NEON_ST2LN_UPD";
> - case AArch64ISD::NEON_ST3LN_UPD:
> - return "AArch64ISD::NEON_ST3LN_UPD";
> - case AArch64ISD::NEON_ST4LN_UPD:
> - return "AArch64ISD::NEON_ST4LN_UPD";
> - case AArch64ISD::NEON_VEXTRACT:
> - return "AArch64ISD::NEON_VEXTRACT";
> - default:
> - return nullptr;
> - }
> -}
> -
> -static const MCPhysReg AArch64FPRArgRegs[] = {
> - AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
> - AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7
> -};
> -static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs);
> -
> -static const MCPhysReg AArch64ArgRegs[] = {
> - AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
> - AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7
> -};
> -static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs);
> -
> -static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
> - CCValAssign::LocInfo LocInfo,
> - ISD::ArgFlagsTy ArgFlags, CCState &State) {
> - // Mark all remaining general purpose registers as allocated. We don't
> - // backtrack: if (for example) an i128 gets put on the stack, no subsequent
> - // i64 will go in registers (C.11).
> - for (unsigned i = 0; i < NumArgRegs; ++i)
> - State.AllocateReg(AArch64ArgRegs[i]);
> -
> - return false;
> -}
> -
> -#include "AArch64GenCallingConv.inc"
> -
> -CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
> -
> - switch(CC) {
> - default: llvm_unreachable("Unsupported calling convention");
> - case CallingConv::Fast:
> - case CallingConv::C:
> - return CC_A64_APCS;
> - }
> -}
> -
> -void
> -AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
> - SDLoc DL, SDValue &Chain) const {
> - MachineFunction &MF = DAG.getMachineFunction();
> - MachineFrameInfo *MFI = MF.getFrameInfo();
> - AArch64MachineFunctionInfo *FuncInfo
> - = MF.getInfo<AArch64MachineFunctionInfo>();
> -
> - SmallVector<SDValue, 8> MemOps;
> -
> - unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs,
> - NumArgRegs);
> - unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs,
> - NumFPRArgRegs);
> -
> - unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR);
> - int GPRIdx = 0;
> - if (GPRSaveSize != 0) {
> - GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
> -
> - SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
> -
> - for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) {
> - unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass);
> - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
> - SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
> - MachinePointerInfo::getStack(i * 8),
> - false, false, 0);
> - MemOps.push_back(Store);
> - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
> - DAG.getConstant(8, getPointerTy()));
> - }
> - }
> -
> - if (getSubtarget()->hasFPARMv8()) {
> - unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
> - int FPRIdx = 0;
> - // According to the AArch64 Procedure Call Standard, section B.1/B.3, we
> - // can omit a register save area if we know we'll never use registers of
> - // that class.
> - if (FPRSaveSize != 0) {
> - FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
> -
> - SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
> -
> - for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
> - unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
> - &AArch64::FPR128RegClass);
> - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
> - SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
> - MachinePointerInfo::getStack(i * 16),
> - false, false, 0);
> - MemOps.push_back(Store);
> - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
> - DAG.getConstant(16, getPointerTy()));
> - }
> - }
> - FuncInfo->setVariadicFPRIdx(FPRIdx);
> - FuncInfo->setVariadicFPRSize(FPRSaveSize);
> - }
> -
> - unsigned StackOffset = RoundUpToAlignment(CCInfo.getNextStackOffset(), 8);
> - int StackIdx = MFI->CreateFixedObject(8, StackOffset, true);
> -
> - FuncInfo->setVariadicStackIdx(StackIdx);
> - FuncInfo->setVariadicGPRIdx(GPRIdx);
> - FuncInfo->setVariadicGPRSize(GPRSaveSize);
> -
> - if (!MemOps.empty()) {
> - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
> - }
> -}
> -
> -
> -SDValue
> -AArch64TargetLowering::LowerFormalArguments(SDValue Chain,
> - CallingConv::ID CallConv, bool isVarArg,
> - const SmallVectorImpl<ISD::InputArg> &Ins,
> - SDLoc dl, SelectionDAG &DAG,
> - SmallVectorImpl<SDValue> &InVals) const {
> - MachineFunction &MF = DAG.getMachineFunction();
> - AArch64MachineFunctionInfo *FuncInfo
> - = MF.getInfo<AArch64MachineFunctionInfo>();
> - MachineFrameInfo *MFI = MF.getFrameInfo();
> - bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
> -
> - SmallVector<CCValAssign, 16> ArgLocs;
> - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
> - getTargetMachine(), ArgLocs, *DAG.getContext());
> - CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
> -
> - SmallVector<SDValue, 16> ArgValues;
> -
> - SDValue ArgValue;
> - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
> - CCValAssign &VA = ArgLocs[i];
> - ISD::ArgFlagsTy Flags = Ins[i].Flags;
> -
> - if (Flags.isByVal()) {
> - // Byval is used for small structs and HFAs in the PCS, but the system
> - // should work in a non-compliant manner for larger structs.
> - EVT PtrTy = getPointerTy();
> - int Size = Flags.getByValSize();
> - unsigned NumRegs = (Size + 7) / 8;
> -
> - uint32_t BEAlign = 0;
> - if (Size < 8 && !getSubtarget()->isLittle())
> - BEAlign = 8-Size;
> - unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs,
> - VA.getLocMemOffset() + BEAlign,
> - false);
> - SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
> - InVals.push_back(FrameIdxN);
> -
> - continue;
> - } else if (VA.isRegLoc()) {
> - MVT RegVT = VA.getLocVT();
> - const TargetRegisterClass *RC = getRegClassFor(RegVT);
> - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
> -
> - ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
> - } else { // VA.isRegLoc()
> - assert(VA.isMemLoc());
> -
> - int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
> - VA.getLocMemOffset(), true);
> -
> - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
> - ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
> - MachinePointerInfo::getFixedStack(FI),
> - false, false, false, 0);
> -
> -
> - }
> -
> - switch (VA.getLocInfo()) {
> - default: llvm_unreachable("Unknown loc info!");
> - case CCValAssign::Full: break;
> - case CCValAssign::BCvt:
> - ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue);
> - break;
> - case CCValAssign::SExt:
> - case CCValAssign::ZExt:
> - case CCValAssign::AExt:
> - case CCValAssign::FPExt: {
> - unsigned DestSize = VA.getValVT().getSizeInBits();
> - unsigned DestSubReg;
> -
> - switch (DestSize) {
> - case 8: DestSubReg = AArch64::sub_8; break;
> - case 16: DestSubReg = AArch64::sub_16; break;
> - case 32: DestSubReg = AArch64::sub_32; break;
> - case 64: DestSubReg = AArch64::sub_64; break;
> - default: llvm_unreachable("Unexpected argument promotion");
> - }
> -
> - ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
> - VA.getValVT(), ArgValue,
> - DAG.getTargetConstant(DestSubReg, MVT::i32)),
> - 0);
> - break;
> - }
> - }
> -
> - InVals.push_back(ArgValue);
> - }
> -
> - if (isVarArg)
> - SaveVarArgRegisters(CCInfo, DAG, dl, Chain);
> -
> - unsigned StackArgSize = CCInfo.getNextStackOffset();
> - if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
> - // This is a non-standard ABI so by fiat I say we're allowed to make full
> - // use of the stack area to be popped, which must be aligned to 16 bytes in
> - // any case:
> - StackArgSize = RoundUpToAlignment(StackArgSize, 16);
> -
> - // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
> - // a multiple of 16.
> - FuncInfo->setArgumentStackToRestore(StackArgSize);
> -
> - // This realignment carries over to the available bytes below. Our own
> - // callers will guarantee the space is free by giving an aligned value to
> - // CALLSEQ_START.
> - }
> - // Even if we're not expected to free up the space, it's useful to know how
> - // much is there while considering tail calls (because we can reuse it).
> - FuncInfo->setBytesInStackArgArea(StackArgSize);
> -
> - return Chain;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerReturn(SDValue Chain,
> - CallingConv::ID CallConv, bool isVarArg,
> - const SmallVectorImpl<ISD::OutputArg> &Outs,
> - const SmallVectorImpl<SDValue> &OutVals,
> - SDLoc dl, SelectionDAG &DAG) const {
> - // CCValAssign - represent the assignment of the return value to a location.
> - SmallVector<CCValAssign, 16> RVLocs;
> -
> - // CCState - Info about the registers and stack slots.
> - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
> - getTargetMachine(), RVLocs, *DAG.getContext());
> -
> - // Analyze outgoing return values.
> - CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv));
> -
> - SDValue Flag;
> - SmallVector<SDValue, 4> RetOps(1, Chain);
> -
> - for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
> - // PCS: "If the type, T, of the result of a function is such that
> - // void func(T arg) would require that arg be passed as a value in a
> - // register (or set of registers) according to the rules in 5.4, then the
> - // result is returned in the same registers as would be used for such an
> - // argument.
> - //
> - // Otherwise, the caller shall reserve a block of memory of sufficient
> - // size and alignment to hold the result. The address of the memory block
> - // shall be passed as an additional argument to the function in x8."
> - //
> - // This is implemented in two places. The register-return values are dealt
> - // with here, more complex returns are passed as an sret parameter, which
> - // means we don't have to worry about it during actual return.
> - CCValAssign &VA = RVLocs[i];
> - assert(VA.isRegLoc() && "Only register-returns should be created by PCS");
> -
> -
> - SDValue Arg = OutVals[i];
> -
> - // There's no convenient note in the ABI about this as there is for normal
> - // arguments, but it says return values are passed in the same registers as
> - // an argument would be. I believe that includes the comments about
> - // unspecified higher bits, putting the burden of widening on the *caller*
> - // for return values.
> - switch (VA.getLocInfo()) {
> - default: llvm_unreachable("Unknown loc info");
> - case CCValAssign::Full: break;
> - case CCValAssign::SExt:
> - case CCValAssign::ZExt:
> - case CCValAssign::AExt:
> - // Floating-point values should only be extended when they're going into
> - // memory, which can't happen here so an integer extend is acceptable.
> - Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
> - break;
> - case CCValAssign::BCvt:
> - Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
> - break;
> - }
> -
> - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
> - Flag = Chain.getValue(1);
> - RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
> - }
> -
> - RetOps[0] = Chain; // Update chain.
> -
> - // Add the flag if we have it.
> - if (Flag.getNode())
> - RetOps.push_back(Flag);
> -
> - return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, RetOps);
> -}
> -
> -unsigned AArch64TargetLowering::getByValTypeAlignment(Type *Ty) const {
> - // This is a new backend. For anything more precise than this a FE should
> - // set an explicit alignment.
> - return 4;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
> - SmallVectorImpl<SDValue> &InVals) const {
> - SelectionDAG &DAG = CLI.DAG;
> - SDLoc &dl = CLI.DL;
> - SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
> - SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
> - SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
> - SDValue Chain = CLI.Chain;
> - SDValue Callee = CLI.Callee;
> - bool &IsTailCall = CLI.IsTailCall;
> - CallingConv::ID CallConv = CLI.CallConv;
> - bool IsVarArg = CLI.IsVarArg;
> -
> - MachineFunction &MF = DAG.getMachineFunction();
> - AArch64MachineFunctionInfo *FuncInfo
> - = MF.getInfo<AArch64MachineFunctionInfo>();
> - bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
> - bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet();
> - bool IsSibCall = false;
> -
> - if (IsTailCall) {
> - IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
> - IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
> - Outs, OutVals, Ins, DAG);
> -
> - if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall())
> - report_fatal_error("failed to perform tail call elimination on a call "
> - "site marked musttail");
> -
> - // A sibling call is one where we're under the usual C ABI and not planning
> - // to change that but can still do a tail call:
> - if (!TailCallOpt && IsTailCall)
> - IsSibCall = true;
> - }
> -
> - SmallVector<CCValAssign, 16> ArgLocs;
> - CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
> - getTargetMachine(), ArgLocs, *DAG.getContext());
> - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
> -
> - // On AArch64 (and all other architectures I'm aware of) the most this has to
> - // do is adjust the stack pointer.
> - unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16);
> - if (IsSibCall) {
> - // Since we're not changing the ABI to make this a tail call, the memory
> - // operands are already available in the caller's incoming argument space.
> - NumBytes = 0;
> - }
> -
> - // FPDiff is the byte offset of the call's argument area from the callee's.
> - // Stores to callee stack arguments will be placed in FixedStackSlots offset
> - // by this amount for a tail call. In a sibling call it must be 0 because the
> - // caller will deallocate the entire stack and the callee still expects its
> - // arguments to begin at SP+0. Completely unused for non-tail calls.
> - int FPDiff = 0;
> -
> - if (IsTailCall && !IsSibCall) {
> - unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
> -
> - // FPDiff will be negative if this tail call requires more space than we
> - // would automatically have in our incoming argument space. Positive if we
> - // can actually shrink the stack.
> - FPDiff = NumReusableBytes - NumBytes;
> -
> - // The stack pointer must be 16-byte aligned at all times it's used for a
> - // memory operation, which in practice means at *all* times and in
> - // particular across call boundaries. Therefore our own arguments started at
> - // a 16-byte aligned SP and the delta applied for the tail call should
> - // satisfy the same constraint.
> - assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
> - }
> -
> - if (!IsSibCall)
> - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
> - dl);
> -
> - SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP,
> - getPointerTy());
> -
> - SmallVector<SDValue, 8> MemOpChains;
> - SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
> -
> - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
> - CCValAssign &VA = ArgLocs[i];
> - ISD::ArgFlagsTy Flags = Outs[i].Flags;
> - SDValue Arg = OutVals[i];
> -
> - // Callee does the actual widening, so all extensions just use an implicit
> - // definition of the rest of the Loc. Aesthetically, this would be nicer as
> - // an ANY_EXTEND, but that isn't valid for floating-point types and this
> - // alternative works on integer types too.
> - switch (VA.getLocInfo()) {
> - default: llvm_unreachable("Unknown loc info!");
> - case CCValAssign::Full: break;
> - case CCValAssign::SExt:
> - case CCValAssign::ZExt:
> - case CCValAssign::AExt:
> - case CCValAssign::FPExt: {
> - unsigned SrcSize = VA.getValVT().getSizeInBits();
> - unsigned SrcSubReg;
> -
> - switch (SrcSize) {
> - case 8: SrcSubReg = AArch64::sub_8; break;
> - case 16: SrcSubReg = AArch64::sub_16; break;
> - case 32: SrcSubReg = AArch64::sub_32; break;
> - case 64: SrcSubReg = AArch64::sub_64; break;
> - default: llvm_unreachable("Unexpected argument promotion");
> - }
> -
> - Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
> - VA.getLocVT(),
> - DAG.getUNDEF(VA.getLocVT()),
> - Arg,
> - DAG.getTargetConstant(SrcSubReg, MVT::i32)),
> - 0);
> -
> - break;
> - }
> - case CCValAssign::BCvt:
> - Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
> - break;
> - }
> -
> - if (VA.isRegLoc()) {
> - // A normal register (sub-) argument. For now we just note it down because
> - // we want to copy things into registers as late as possible to avoid
> - // register-pressure (and possibly worse).
> - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
> - continue;
> - }
> -
> - assert(VA.isMemLoc() && "unexpected argument location");
> -
> - SDValue DstAddr;
> - MachinePointerInfo DstInfo;
> - if (IsTailCall) {
> - uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() :
> - VA.getLocVT().getSizeInBits();
> - OpSize = (OpSize + 7) / 8;
> - int32_t Offset = VA.getLocMemOffset() + FPDiff;
> - int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
> -
> - DstAddr = DAG.getFrameIndex(FI, getPointerTy());
> - DstInfo = MachinePointerInfo::getFixedStack(FI);
> -
> - // Make sure any stack arguments overlapping with where we're storing are
> - // loaded before this eventual operation. Otherwise they'll be clobbered.
> - Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
> - } else {
> - uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize()*8 :
> - VA.getLocVT().getSizeInBits();
> - OpSize = (OpSize + 7) / 8;
> - uint32_t BEAlign = 0;
> - if (OpSize < 8 && !getSubtarget()->isLittle())
> - BEAlign = 8-OpSize;
> - SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + BEAlign);
> -
> - DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
> - DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset());
> - }
> -
> - if (Flags.isByVal()) {
> - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64);
> - SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode,
> - Flags.getByValAlign(),
> - /*isVolatile = */ false,
> - /*alwaysInline = */ false,
> - DstInfo, MachinePointerInfo());
> - MemOpChains.push_back(Cpy);
> - } else {
> - // Normal stack argument, put it where it's needed.
> - SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo,
> - false, false, 0);
> - MemOpChains.push_back(Store);
> - }
> - }
> -
> - // The loads and stores generated above shouldn't clash with each
> - // other. Combining them with this TokenFactor notes that fact for the rest of
> - // the backend.
> - if (!MemOpChains.empty())
> - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
> -
> - // Most of the rest of the instructions need to be glued together; we don't
> - // want assignments to actual registers used by a call to be rearranged by a
> - // well-meaning scheduler.
> - SDValue InFlag;
> -
> - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
> - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
> - RegsToPass[i].second, InFlag);
> - InFlag = Chain.getValue(1);
> - }
> -
> - // The linker is responsible for inserting veneers when necessary to put a
> - // function call destination in range, so we don't need to bother with a
> - // wrapper here.
> - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
> - const GlobalValue *GV = G->getGlobal();
> - Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
> - } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
> - const char *Sym = S->getSymbol();
> - Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
> - }
> -
> - // We don't usually want to end the call-sequence here because we would tidy
> - // the frame up *after* the call, however in the ABI-changing tail-call case
> - // we've carefully laid out the parameters so that when sp is reset they'll be
> - // in the correct location.
> - if (IsTailCall && !IsSibCall) {
> - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
> - DAG.getIntPtrConstant(0, true), InFlag, dl);
> - InFlag = Chain.getValue(1);
> - }
> -
> - // We produce the following DAG scheme for the actual call instruction:
> - // (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag?
> - //
> - // Most arguments aren't going to be used and just keep the values live as
> - // far as LLVM is concerned. It's expected to be selected as simply "bl
> - // callee" (for a direct, non-tail call).
> - std::vector<SDValue> Ops;
> - Ops.push_back(Chain);
> - Ops.push_back(Callee);
> -
> - if (IsTailCall) {
> - // Each tail call may have to adjust the stack by a different amount, so
> - // this information must travel along with the operation for eventual
> - // consumption by emitEpilogue.
> - Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
> - }
> -
> - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
> - Ops.push_back(DAG.getRegister(RegsToPass[i].first,
> - RegsToPass[i].second.getValueType()));
> -
> -
> - // Add a register mask operand representing the call-preserved registers. This
> - // is used later in codegen to constrain register-allocation.
> - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
> - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
> - assert(Mask && "Missing call preserved mask for calling convention");
> - Ops.push_back(DAG.getRegisterMask(Mask));
> -
> - // If we needed glue, put it in as the last argument.
> - if (InFlag.getNode())
> - Ops.push_back(InFlag);
> -
> - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
> -
> - if (IsTailCall) {
> - return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, Ops);
> - }
> -
> - Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, Ops);
> - InFlag = Chain.getValue(1);
> -
> - // Now we can reclaim the stack, just as well do it before working out where
> - // our return value is.
> - if (!IsSibCall) {
> - uint64_t CalleePopBytes
> - = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0;
> -
> - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
> - DAG.getIntPtrConstant(CalleePopBytes, true),
> - InFlag, dl);
> - InFlag = Chain.getValue(1);
> - }
> -
> - return LowerCallResult(Chain, InFlag, CallConv,
> - IsVarArg, Ins, dl, DAG, InVals);
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
> - CallingConv::ID CallConv, bool IsVarArg,
> - const SmallVectorImpl<ISD::InputArg> &Ins,
> - SDLoc dl, SelectionDAG &DAG,
> - SmallVectorImpl<SDValue> &InVals) const {
> - // Assign locations to each value returned by this call.
> - SmallVector<CCValAssign, 16> RVLocs;
> - CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
> - getTargetMachine(), RVLocs, *DAG.getContext());
> - CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv));
> -
> - for (unsigned i = 0; i != RVLocs.size(); ++i) {
> - CCValAssign VA = RVLocs[i];
> -
> - // Return values that are too big to fit into registers should use an sret
> - // pointer, so this can be a lot simpler than the main argument code.
> - assert(VA.isRegLoc() && "Memory locations not expected for call return");
> -
> - SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
> - InFlag);
> - Chain = Val.getValue(1);
> - InFlag = Val.getValue(2);
> -
> - switch (VA.getLocInfo()) {
> - default: llvm_unreachable("Unknown loc info!");
> - case CCValAssign::Full: break;
> - case CCValAssign::BCvt:
> - Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
> - break;
> - case CCValAssign::ZExt:
> - case CCValAssign::SExt:
> - case CCValAssign::AExt:
> - // Floating-point arguments only get extended/truncated if they're going
> - // in memory, so using the integer operation is acceptable here.
> - Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
> - break;
> - }
> -
> - InVals.push_back(Val);
> - }
> -
> - return Chain;
> -}
> -
> -bool
> -AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
> - CallingConv::ID CalleeCC,
> - bool IsVarArg,
> - bool IsCalleeStructRet,
> - bool IsCallerStructRet,
> - const SmallVectorImpl<ISD::OutputArg> &Outs,
> - const SmallVectorImpl<SDValue> &OutVals,
> - const SmallVectorImpl<ISD::InputArg> &Ins,
> - SelectionDAG& DAG) const {
> -
> - // For CallingConv::C this function knows whether the ABI needs
> - // changing. That's not true for other conventions so they will have to opt in
> - // manually.
> - if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
> - return false;
> -
> - const MachineFunction &MF = DAG.getMachineFunction();
> - const Function *CallerF = MF.getFunction();
> - CallingConv::ID CallerCC = CallerF->getCallingConv();
> - bool CCMatch = CallerCC == CalleeCC;
> -
> - // Byval parameters hand the function a pointer directly into the stack area
> - // we want to reuse during a tail call. Working around this *is* possible (see
> - // X86) but less efficient and uglier in LowerCall.
> - for (Function::const_arg_iterator i = CallerF->arg_begin(),
> - e = CallerF->arg_end(); i != e; ++i)
> - if (i->hasByValAttr())
> - return false;
> -
> - if (getTargetMachine().Options.GuaranteedTailCallOpt) {
> - if (IsTailCallConvention(CalleeCC) && CCMatch)
> - return true;
> - return false;
> - }
> -
> - // Now we search for cases where we can use a tail call without changing the
> - // ABI. Sibcall is used in some places (particularly gcc) to refer to this
> - // concept.
> -
> - // I want anyone implementing a new calling convention to think long and hard
> - // about this assert.
> - assert((!IsVarArg || CalleeCC == CallingConv::C)
> - && "Unexpected variadic calling convention");
> -
> - if (IsVarArg && !Outs.empty()) {
> - // At least two cases here: if caller is fastcc then we can't have any
> - // memory arguments (we'd be expected to clean up the stack afterwards). If
> - // caller is C then we could potentially use its argument area.
> -
> - // FIXME: for now we take the most conservative of these in both cases:
> - // disallow all variadic memory operands.
> - SmallVector<CCValAssign, 16> ArgLocs;
> - CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
> - getTargetMachine(), ArgLocs, *DAG.getContext());
> -
> - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
> - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
> - if (!ArgLocs[i].isRegLoc())
> - return false;
> - }
> -
> - // If the calling conventions do not match, then we'd better make sure the
> - // results are returned in the same way as what the caller expects.
> - if (!CCMatch) {
> - SmallVector<CCValAssign, 16> RVLocs1;
> - CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
> - getTargetMachine(), RVLocs1, *DAG.getContext());
> - CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC));
> -
> - SmallVector<CCValAssign, 16> RVLocs2;
> - CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
> - getTargetMachine(), RVLocs2, *DAG.getContext());
> - CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC));
> -
> - if (RVLocs1.size() != RVLocs2.size())
> - return false;
> - for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
> - if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
> - return false;
> - if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
> - return false;
> - if (RVLocs1[i].isRegLoc()) {
> - if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
> - return false;
> - } else {
> - if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
> - return false;
> - }
> - }
> - }
> -
> - // Nothing more to check if the callee is taking no arguments
> - if (Outs.empty())
> - return true;
> -
> - SmallVector<CCValAssign, 16> ArgLocs;
> - CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
> - getTargetMachine(), ArgLocs, *DAG.getContext());
> -
> - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
> -
> - const AArch64MachineFunctionInfo *FuncInfo
> - = MF.getInfo<AArch64MachineFunctionInfo>();
> -
> - // If the stack arguments for this call would fit into our own save area then
> - // the call can be made tail.
> - return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
> -}
> -
> -bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
> - bool TailCallOpt) const {
> - return CallCC == CallingConv::Fast && TailCallOpt;
> -}
> -
> -bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
> - return CallCC == CallingConv::Fast;
> -}
> -
> -SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
> - SelectionDAG &DAG,
> - MachineFrameInfo *MFI,
> - int ClobberedFI) const {
> - SmallVector<SDValue, 8> ArgChains;
> - int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
> - int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
> -
> - // Include the original chain at the beginning of the list. When this is
> - // used by target LowerCall hooks, this helps legalize find the
> - // CALLSEQ_BEGIN node.
> - ArgChains.push_back(Chain);
> -
> - // Add a chain value for each stack argument corresponding
> - for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
> - UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U)
> - if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
> - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
> - if (FI->getIndex() < 0) {
> - int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
> - int64_t InLastByte = InFirstByte;
> - InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
> -
> - if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
> - (FirstByte <= InFirstByte && InFirstByte <= LastByte))
> - ArgChains.push_back(SDValue(L, 1));
> - }
> -
> - // Build a tokenfactor for all the chains.
> - return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
> -}
> -
> -static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) {
> - switch (CC) {
> - case ISD::SETEQ: return A64CC::EQ;
> - case ISD::SETGT: return A64CC::GT;
> - case ISD::SETGE: return A64CC::GE;
> - case ISD::SETLT: return A64CC::LT;
> - case ISD::SETLE: return A64CC::LE;
> - case ISD::SETNE: return A64CC::NE;
> - case ISD::SETUGT: return A64CC::HI;
> - case ISD::SETUGE: return A64CC::HS;
> - case ISD::SETULT: return A64CC::LO;
> - case ISD::SETULE: return A64CC::LS;
> - default: llvm_unreachable("Unexpected condition code");
> - }
> -}
> -
> -bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const {
> - // icmp is implemented using adds/subs immediate, which take an unsigned
> - // 12-bit immediate, optionally shifted left by 12 bits.
> -
> - // Symmetric by using adds/subs
> - if (Val < 0)
> - Val = -Val;
> -
> - return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0;
> -}
> -
> -SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS,
> - ISD::CondCode CC, SDValue &A64cc,
> - SelectionDAG &DAG, SDLoc &dl) const {
> - if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
> - int64_t C = 0;
> - EVT VT = RHSC->getValueType(0);
> - bool knownInvalid = false;
> -
> - // I'm not convinced the rest of LLVM handles these edge cases properly, but
> - // we can at least get it right.
> - if (isSignedIntSetCC(CC)) {
> - C = RHSC->getSExtValue();
> - } else if (RHSC->getZExtValue() > INT64_MAX) {
> - // A 64-bit constant not representable by a signed 64-bit integer is far
> - // too big to fit into a SUBS immediate anyway.
> - knownInvalid = true;
> - } else {
> - C = RHSC->getZExtValue();
> - }
> -
> - if (!knownInvalid && !isLegalICmpImmediate(C)) {
> - // Constant does not fit, try adjusting it by one?
> - switch (CC) {
> - default: break;
> - case ISD::SETLT:
> - case ISD::SETGE:
> - if (isLegalICmpImmediate(C-1)) {
> - CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
> - RHS = DAG.getConstant(C-1, VT);
> - }
> - break;
> - case ISD::SETULT:
> - case ISD::SETUGE:
> - if (isLegalICmpImmediate(C-1)) {
> - CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
> - RHS = DAG.getConstant(C-1, VT);
> - }
> - break;
> - case ISD::SETLE:
> - case ISD::SETGT:
> - if (isLegalICmpImmediate(C+1)) {
> - CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
> - RHS = DAG.getConstant(C+1, VT);
> - }
> - break;
> - case ISD::SETULE:
> - case ISD::SETUGT:
> - if (isLegalICmpImmediate(C+1)) {
> - CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
> - RHS = DAG.getConstant(C+1, VT);
> - }
> - break;
> - }
> - }
> - }
> -
> - A64CC::CondCodes CondCode = IntCCToA64CC(CC);
> - A64cc = DAG.getConstant(CondCode, MVT::i32);
> - return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
> - DAG.getCondCode(CC));
> -}
> -
> -static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC,
> - A64CC::CondCodes &Alternative) {
> - A64CC::CondCodes CondCode = A64CC::Invalid;
> - Alternative = A64CC::Invalid;
> -
> - switch (CC) {
> - default: llvm_unreachable("Unknown FP condition!");
> - case ISD::SETEQ:
> - case ISD::SETOEQ: CondCode = A64CC::EQ; break;
> - case ISD::SETGT:
> - case ISD::SETOGT: CondCode = A64CC::GT; break;
> - case ISD::SETGE:
> - case ISD::SETOGE: CondCode = A64CC::GE; break;
> - case ISD::SETOLT: CondCode = A64CC::MI; break;
> - case ISD::SETOLE: CondCode = A64CC::LS; break;
> - case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break;
> - case ISD::SETO: CondCode = A64CC::VC; break;
> - case ISD::SETUO: CondCode = A64CC::VS; break;
> - case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break;
> - case ISD::SETUGT: CondCode = A64CC::HI; break;
> - case ISD::SETUGE: CondCode = A64CC::PL; break;
> - case ISD::SETLT:
> - case ISD::SETULT: CondCode = A64CC::LT; break;
> - case ISD::SETLE:
> - case ISD::SETULE: CondCode = A64CC::LE; break;
> - case ISD::SETNE:
> - case ISD::SETUNE: CondCode = A64CC::NE; break;
> - }
> - return CondCode;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
> - SDLoc DL(Op);
> - EVT PtrVT = getPointerTy();
> - const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
> -
> - switch(getTargetMachine().getCodeModel()) {
> - case CodeModel::Small:
> - // The most efficient code is PC-relative anyway for the small memory model,
> - // so we don't need to worry about relocation model.
> - return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
> - DAG.getTargetBlockAddress(BA, PtrVT, 0,
> - AArch64II::MO_NO_FLAG),
> - DAG.getTargetBlockAddress(BA, PtrVT, 0,
> - AArch64II::MO_LO12),
> - DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
> - case CodeModel::Large:
> - return DAG.getNode(
> - AArch64ISD::WrapperLarge, DL, PtrVT,
> - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3),
> - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
> - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
> - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
> - default:
> - llvm_unreachable("Only small and large code models supported now");
> - }
> -}
> -
> -
> -// (BRCOND chain, val, dest)
> -SDValue
> -AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
> - SDLoc dl(Op);
> - SDValue Chain = Op.getOperand(0);
> - SDValue TheBit = Op.getOperand(1);
> - SDValue DestBB = Op.getOperand(2);
> -
> - // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
> - // that as the consumer we are responsible for ignoring rubbish in higher
> - // bits.
> - TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
> - DAG.getConstant(1, MVT::i32));
> -
> - SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
> - DAG.getConstant(0, TheBit.getValueType()),
> - DAG.getCondCode(ISD::SETNE));
> -
> - return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain,
> - A64CMP, DAG.getConstant(A64CC::NE, MVT::i32),
> - DestBB);
> -}
> -
> -// (BR_CC chain, condcode, lhs, rhs, dest)
> -SDValue
> -AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
> - SDLoc dl(Op);
> - SDValue Chain = Op.getOperand(0);
> - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
> - SDValue LHS = Op.getOperand(2);
> - SDValue RHS = Op.getOperand(3);
> - SDValue DestBB = Op.getOperand(4);
> -
> - if (LHS.getValueType() == MVT::f128) {
> - // f128 comparisons are lowered to runtime calls by a routine which sets
> - // LHS, RHS and CC appropriately for the rest of this function to continue.
> - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
> -
> - // If softenSetCCOperands returned a scalar, we need to compare the result
> - // against zero to select between true and false values.
> - if (!RHS.getNode()) {
> - RHS = DAG.getConstant(0, LHS.getValueType());
> - CC = ISD::SETNE;
> - }
> - }
> -
> - if (LHS.getValueType().isInteger()) {
> - SDValue A64cc;
> -
> - // Integers are handled in a separate function because the combinations of
> - // immediates and tests can get hairy and we may want to fiddle things.
> - SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
> -
> - return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
> - Chain, CmpOp, A64cc, DestBB);
> - }
> -
> - // Note that some LLVM floating-point CondCodes can't be lowered to a single
> - // conditional branch, hence FPCCToA64CC can set a second test, where either
> - // passing is sufficient.
> - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
> - CondCode = FPCCToA64CC(CC, Alternative);
> - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
> - SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
> - DAG.getCondCode(CC));
> - SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
> - Chain, SetCC, A64cc, DestBB);
> -
> - if (Alternative != A64CC::Invalid) {
> - A64cc = DAG.getConstant(Alternative, MVT::i32);
> - A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
> - A64BR_CC, SetCC, A64cc, DestBB);
> -
> - }
> -
> - return A64BR_CC;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
> - RTLIB::Libcall Call) const {
> - ArgListTy Args;
> - ArgListEntry Entry;
> - for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
> - EVT ArgVT = Op.getOperand(i).getValueType();
> - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
> - Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy;
> - Entry.isSExt = false;
> - Entry.isZExt = false;
> - Args.push_back(Entry);
> - }
> - SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy());
> -
> - Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
> -
> - // By default, the input chain to this libcall is the entry node of the
> - // function. If the libcall is going to be emitted as a tail call then
> - // isUsedByReturnOnly will change it to the right chain if the return
> - // node which is being folded has a non-entry input chain.
> - SDValue InChain = DAG.getEntryNode();
> -
> - // isTailCall may be true since the callee does not reference caller stack
> - // frame. Check if it's in the right position.
> - SDValue TCChain = InChain;
> - bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain);
> - if (isTailCall)
> - InChain = TCChain;
> -
> - TargetLowering::CallLoweringInfo CLI(DAG);
> - CLI.setDebugLoc(SDLoc(Op)).setChain(InChain)
> - .setCallee(getLibcallCallingConv(Call), RetTy, Callee, &Args, 0)
> - .setTailCall(isTailCall);
> -
> - std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
> -
> - if (!CallInfo.second.getNode())
> - // It's a tailcall, return the chain (which is the DAG root).
> - return DAG.getRoot();
> -
> - return CallInfo.first;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
> - if (Op.getOperand(0).getValueType() != MVT::f128) {
> - // It's legal except when f128 is involved
> - return Op;
> - }
> -
> - RTLIB::Libcall LC;
> - LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
> -
> - SDValue SrcVal = Op.getOperand(0);
> - return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
> - /*isSigned*/ false, SDLoc(Op)).first;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
> - assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
> -
> - RTLIB::Libcall LC;
> - LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
> -
> - return LowerF128ToCall(Op, DAG, LC);
> -}
> -
> -static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG,
> - bool IsSigned) {
> - SDLoc dl(Op);
> - EVT VT = Op.getValueType();
> - SDValue Vec = Op.getOperand(0);
> - EVT OpVT = Vec.getValueType();
> - unsigned Opc = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
> -
> - if (VT.getVectorNumElements() == 1) {
> - assert(OpVT == MVT::v1f64 && "Unexpected vector type!");
> - if (VT.getSizeInBits() == OpVT.getSizeInBits())
> - return Op;
> - return DAG.UnrollVectorOp(Op.getNode());
> - }
> -
> - if (VT.getSizeInBits() > OpVT.getSizeInBits()) {
> - assert(Vec.getValueType() == MVT::v2f32 && VT == MVT::v2i64 &&
> - "Unexpected vector type!");
> - Vec = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Vec);
> - return DAG.getNode(Opc, dl, VT, Vec);
> - } else if (VT.getSizeInBits() < OpVT.getSizeInBits()) {
> - EVT CastVT = EVT::getIntegerVT(*DAG.getContext(),
> - OpVT.getVectorElementType().getSizeInBits());
> - CastVT =
> - EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements());
> - Vec = DAG.getNode(Opc, dl, CastVT, Vec);
> - return DAG.getNode(ISD::TRUNCATE, dl, VT, Vec);
> - }
> - return DAG.getNode(Opc, dl, VT, Vec);
> -}
> -
> -static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
> - // We custom lower concat_vectors with 4, 8, or 16 operands that are all the
> - // same operand and of type v1* using the DUP instruction.
> - unsigned NumOps = Op->getNumOperands();
> - if (NumOps == 2) {
> - assert(Op.getValueType().getSizeInBits() == 128 && "unexpected concat");
> - return Op;
> - }
> -
> - if (NumOps != 4 && NumOps != 8 && NumOps != 16)
> - return SDValue();
> -
> - // Must be a single value for VDUP.
> - SDValue Op0 = Op.getOperand(0);
> - for (unsigned i = 1; i < NumOps; ++i) {
> - SDValue OpN = Op.getOperand(i);
> - if (Op0 != OpN)
> - return SDValue();
> - }
> -
> - // Verify the value type.
> - EVT EltVT = Op0.getValueType();
> - switch (NumOps) {
> - default: llvm_unreachable("Unexpected number of operands");
> - case 4:
> - if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32)
> - return SDValue();
> - break;
> - case 8:
> - if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16)
> - return SDValue();
> - break;
> - case 16:
> - if (EltVT != MVT::v1i8)
> - return SDValue();
> - break;
> - }
> -
> - SDLoc DL(Op);
> - EVT VT = Op.getValueType();
> - // VDUP produces better code for constants.
> - if (Op0->getOpcode() == ISD::BUILD_VECTOR)
> - return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0));
> - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0,
> - DAG.getConstant(0, MVT::i64));
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
> - bool IsSigned) const {
> - if (Op.getValueType().isVector())
> - return LowerVectorFP_TO_INT(Op, DAG, IsSigned);
> - if (Op.getOperand(0).getValueType() != MVT::f128) {
> - // It's legal except when f128 is involved
> - return Op;
> - }
> -
> - RTLIB::Libcall LC;
> - if (IsSigned)
> - LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
> - else
> - LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
> -
> - return LowerF128ToCall(Op, DAG, LC);
> -}
> -
> -SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
> - MachineFunction &MF = DAG.getMachineFunction();
> - MachineFrameInfo *MFI = MF.getFrameInfo();
> - MFI->setReturnAddressIsTaken(true);
> -
> - if (verifyReturnAddressArgumentIsConstant(Op, DAG))
> - return SDValue();
> -
> - EVT VT = Op.getValueType();
> - SDLoc dl(Op);
> - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
> - if (Depth) {
> - SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
> - SDValue Offset = DAG.getConstant(8, MVT::i64);
> - return DAG.getLoad(VT, dl, DAG.getEntryNode(),
> - DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
> - MachinePointerInfo(), false, false, false, 0);
> - }
> -
> - // Return X30, which contains the return address. Mark it an implicit live-in.
> - unsigned Reg = MF.addLiveIn(AArch64::X30, getRegClassFor(MVT::i64));
> - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, MVT::i64);
> -}
> -
> -
> -SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG)
> - const {
> - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
> - MFI->setFrameAddressIsTaken(true);
> -
> - EVT VT = Op.getValueType();
> - SDLoc dl(Op);
> - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
> - unsigned FrameReg = AArch64::X29;
> - SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
> - while (Depth--)
> - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
> - MachinePointerInfo(),
> - false, false, false, 0);
> - return FrameAddr;
> -}
> -
> -// FIXME? Maybe this could be a TableGen attribute on some registers and
> -// this table could be generated automatically from RegInfo.
> -unsigned AArch64TargetLowering::getRegisterByName(const char* RegName,
> - EVT VT) const {
> - unsigned Reg = StringSwitch<unsigned>(RegName)
> - .Case("sp", AArch64::XSP)
> - .Default(0);
> - if (Reg)
> - return Reg;
> - report_fatal_error("Invalid register name global variable");
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op,
> - SelectionDAG &DAG) const {
> - assert(getTargetMachine().getCodeModel() == CodeModel::Large);
> - assert(getTargetMachine().getRelocationModel() == Reloc::Static);
> -
> - EVT PtrVT = getPointerTy();
> - SDLoc dl(Op);
> - const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
> - const GlobalValue *GV = GN->getGlobal();
> -
> - SDValue GlobalAddr = DAG.getNode(
> - AArch64ISD::WrapperLarge, dl, PtrVT,
> - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3),
> - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
> - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
> - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
> -
> - if (GN->getOffset() != 0)
> - return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
> - DAG.getConstant(GN->getOffset(), PtrVT));
> -
> - return GlobalAddr;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op,
> - SelectionDAG &DAG) const {
> - assert(getTargetMachine().getCodeModel() == CodeModel::Small);
> -
> - EVT PtrVT = getPointerTy();
> - SDLoc dl(Op);
> - const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
> - const GlobalValue *GV = GN->getGlobal();
> - unsigned Alignment = GV->getAlignment();
> - Reloc::Model RelocM = getTargetMachine().getRelocationModel();
> - if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
> - // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
> - // to zero when they remain undefined. In PIC mode the GOT can take care of
> - // this, but in absolute mode we use a constant pool load.
> - SDValue PoolAddr;
> - PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
> - DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
> - AArch64II::MO_NO_FLAG),
> - DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
> - AArch64II::MO_LO12),
> - DAG.getConstant(8, MVT::i32));
> - SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
> - MachinePointerInfo::getConstantPool(),
> - /*isVolatile=*/ false,
> - /*isNonTemporal=*/ true,
> - /*isInvariant=*/ true, 8);
> - if (GN->getOffset() != 0)
> - return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
> - DAG.getConstant(GN->getOffset(), PtrVT));
> -
> - return GlobalAddr;
> - }
> -
> - if (Alignment == 0) {
> - const PointerType *GVPtrTy = cast<PointerType>(GV->getType());
> - if (GVPtrTy->getElementType()->isSized()) {
> - Alignment
> - = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType());
> - } else {
> - // Be conservative if we can't guess, not that it really matters:
> - // functions and labels aren't valid for loads, and the methods used to
> - // actually calculate an address work with any alignment.
> - Alignment = 1;
> - }
> - }
> -
> - unsigned char HiFixup, LoFixup;
> - bool UseGOT = getSubtarget()->GVIsIndirectSymbol(GV, RelocM);
> -
> - if (UseGOT) {
> - HiFixup = AArch64II::MO_GOT;
> - LoFixup = AArch64II::MO_GOT_LO12;
> - Alignment = 8;
> - } else {
> - HiFixup = AArch64II::MO_NO_FLAG;
> - LoFixup = AArch64II::MO_LO12;
> - }
> -
> - // AArch64's small model demands the following sequence:
> - // ADRP x0, somewhere
> - // ADD x0, x0, #:lo12:somewhere ; (or LDR directly).
> - SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
> - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
> - HiFixup),
> - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
> - LoFixup),
> - DAG.getConstant(Alignment, MVT::i32));
> -
> - if (UseGOT) {
> - GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(),
> - GlobalRef);
> - }
> -
> - if (GN->getOffset() != 0)
> - return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef,
> - DAG.getConstant(GN->getOffset(), PtrVT));
> -
> - return GlobalRef;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
> - SelectionDAG &DAG) const {
> - // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
> - // we make those distinctions here.
> -
> - switch (getTargetMachine().getCodeModel()) {
> - case CodeModel::Small:
> - return LowerGlobalAddressELFSmall(Op, DAG);
> - case CodeModel::Large:
> - return LowerGlobalAddressELFLarge(Op, DAG);
> - default:
> - llvm_unreachable("Only small and large code models supported now");
> - }
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerConstantPool(SDValue Op,
> - SelectionDAG &DAG) const {
> - SDLoc DL(Op);
> - EVT PtrVT = getPointerTy();
> - ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Op);
> - const Constant *C = CN->getConstVal();
> -
> - switch(getTargetMachine().getCodeModel()) {
> - case CodeModel::Small:
> - // The most efficient code is PC-relative anyway for the small memory model,
> - // so we don't need to worry about relocation model.
> - return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
> - DAG.getTargetConstantPool(C, PtrVT, 0, 0,
> - AArch64II::MO_NO_FLAG),
> - DAG.getTargetConstantPool(C, PtrVT, 0, 0,
> - AArch64II::MO_LO12),
> - DAG.getConstant(CN->getAlignment(), MVT::i32));
> - case CodeModel::Large:
> - return DAG.getNode(
> - AArch64ISD::WrapperLarge, DL, PtrVT,
> - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
> - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
> - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
> - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC));
> - default:
> - llvm_unreachable("Only small and large code models supported now");
> - }
> -}
> -
> -SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
> - SDValue DescAddr,
> - SDLoc DL,
> - SelectionDAG &DAG) const {
> - EVT PtrVT = getPointerTy();
> -
> - // The function we need to call is simply the first entry in the GOT for this
> - // descriptor, load it in preparation.
> - SDValue Func, Chain;
> - Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
> - DescAddr);
> -
> - // The function takes only one argument: the address of the descriptor itself
> - // in X0.
> - SDValue Glue;
> - Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
> - Glue = Chain.getValue(1);
> -
> - // Finally, there's a special calling-convention which means that the lookup
> - // must preserve all registers (except X0, obviously).
> - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
> - const AArch64RegisterInfo *A64RI
> - = static_cast<const AArch64RegisterInfo *>(TRI);
> - const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask();
> -
> - // We're now ready to populate the argument list, as with a normal call:
> - std::vector<SDValue> Ops;
> - Ops.push_back(Chain);
> - Ops.push_back(Func);
> - Ops.push_back(SymAddr);
> - Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
> - Ops.push_back(DAG.getRegisterMask(Mask));
> - Ops.push_back(Glue);
> -
> - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
> - Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, Ops);
> - Glue = Chain.getValue(1);
> -
> - // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it
> - // back to the generic handling code.
> - return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
> - SelectionDAG &DAG) const {
> - assert(getSubtarget()->isTargetELF() &&
> - "TLS not implemented for non-ELF targets");
> - assert(getTargetMachine().getCodeModel() == CodeModel::Small
> - && "TLS only supported in small memory model");
> - const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
> -
> - TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
> -
> - SDValue TPOff;
> - EVT PtrVT = getPointerTy();
> - SDLoc DL(Op);
> - const GlobalValue *GV = GA->getGlobal();
> -
> - SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
> -
> - if (Model == TLSModel::InitialExec) {
> - TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
> - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
> - AArch64II::MO_GOTTPREL),
> - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
> - AArch64II::MO_GOTTPREL_LO12),
> - DAG.getConstant(8, MVT::i32));
> - TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
> - TPOff);
> - } else if (Model == TLSModel::LocalExec) {
> - SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
> - AArch64II::MO_TPREL_G1);
> - SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
> - AArch64II::MO_TPREL_G0_NC);
> -
> - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
> - DAG.getTargetConstant(1, MVT::i32)), 0);
> - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
> - TPOff, LoVar,
> - DAG.getTargetConstant(0, MVT::i32)), 0);
> - } else if (Model == TLSModel::GeneralDynamic) {
> - // Accesses used in this sequence go via the TLS descriptor which lives in
> - // the GOT. Prepare an address we can use to handle this.
> - SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
> - AArch64II::MO_TLSDESC);
> - SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
> - AArch64II::MO_TLSDESC_LO12);
> - SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
> - HiDesc, LoDesc,
> - DAG.getConstant(8, MVT::i32));
> - SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0);
> -
> - TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
> - } else if (Model == TLSModel::LocalDynamic) {
> - // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
> - // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
> - // the beginning of the module's TLS region, followed by a DTPREL offset
> - // calculation.
> -
> - // These accesses will need deduplicating if there's more than one.
> - AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction()
> - .getInfo<AArch64MachineFunctionInfo>();
> - MFI->incNumLocalDynamicTLSAccesses();
> -
> -
> - // Get the location of _TLS_MODULE_BASE_:
> - SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
> - AArch64II::MO_TLSDESC);
> - SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
> - AArch64II::MO_TLSDESC_LO12);
> - SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
> - HiDesc, LoDesc,
> - DAG.getConstant(8, MVT::i32));
> - SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT);
> -
> - ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
> -
> - // Get the variable's offset from _TLS_MODULE_BASE_
> - SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
> - AArch64II::MO_DTPREL_G1);
> - SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
> - AArch64II::MO_DTPREL_G0_NC);
> -
> - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
> - DAG.getTargetConstant(0, MVT::i32)), 0);
> - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
> - TPOff, LoVar,
> - DAG.getTargetConstant(0, MVT::i32)), 0);
> - } else
> - llvm_unreachable("Unsupported TLS access model");
> -
> -
> - return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
> -}
> -
> -static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG,
> - bool IsSigned) {
> - SDLoc dl(Op);
> - EVT VT = Op.getValueType();
> - SDValue Vec = Op.getOperand(0);
> - unsigned Opc = IsSigned ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
> -
> - if (VT.getVectorNumElements() == 1) {
> - assert(VT == MVT::v1f64 && "Unexpected vector type!");
> - if (VT.getSizeInBits() == Vec.getValueSizeInBits())
> - return Op;
> - return DAG.UnrollVectorOp(Op.getNode());
> - }
> -
> - if (VT.getSizeInBits() < Vec.getValueSizeInBits()) {
> - assert(Vec.getValueType() == MVT::v2i64 && VT == MVT::v2f32 &&
> - "Unexpected vector type!");
> - Vec = DAG.getNode(Opc, dl, MVT::v2f64, Vec);
> - return DAG.getNode(ISD::FP_ROUND, dl, VT, Vec, DAG.getIntPtrConstant(0));
> - } else if (VT.getSizeInBits() > Vec.getValueSizeInBits()) {
> - unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
> - EVT CastVT = EVT::getIntegerVT(*DAG.getContext(),
> - VT.getVectorElementType().getSizeInBits());
> - CastVT =
> - EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements());
> - Vec = DAG.getNode(CastOpc, dl, CastVT, Vec);
> - }
> -
> - return DAG.getNode(Opc, dl, VT, Vec);
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
> - bool IsSigned) const {
> - if (Op.getValueType().isVector())
> - return LowerVectorINT_TO_FP(Op, DAG, IsSigned);
> - if (Op.getValueType() != MVT::f128) {
> - // Legal for everything except f128.
> - return Op;
> - }
> -
> - RTLIB::Libcall LC;
> - if (IsSigned)
> - LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
> - else
> - LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
> -
> - return LowerF128ToCall(Op, DAG, LC);
> -}
> -
> -
> -SDValue
> -AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
> - JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
> - SDLoc dl(JT);
> - EVT PtrVT = getPointerTy();
> -
> - // When compiling PIC, jump tables get put in the code section so a static
> - // relocation-style is acceptable for both cases.
> - switch (getTargetMachine().getCodeModel()) {
> - case CodeModel::Small:
> - return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
> - DAG.getTargetJumpTable(JT->getIndex(), PtrVT),
> - DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
> - AArch64II::MO_LO12),
> - DAG.getConstant(1, MVT::i32));
> - case CodeModel::Large:
> - return DAG.getNode(
> - AArch64ISD::WrapperLarge, dl, PtrVT,
> - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3),
> - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC),
> - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC),
> - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC));
> - default:
> - llvm_unreachable("Only small and large code models supported now");
> - }
> -}
> -
> -// (SELECT testbit, iftrue, iffalse)
> -SDValue
> -AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
> - SDLoc dl(Op);
> - SDValue TheBit = Op.getOperand(0);
> - SDValue IfTrue = Op.getOperand(1);
> - SDValue IfFalse = Op.getOperand(2);
> -
> - // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
> - // that as the consumer we are responsible for ignoring rubbish in higher
> - // bits.
> - TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
> - DAG.getConstant(1, MVT::i32));
> - SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
> - DAG.getConstant(0, TheBit.getValueType()),
> - DAG.getCondCode(ISD::SETNE));
> -
> - return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
> - A64CMP, IfTrue, IfFalse,
> - DAG.getConstant(A64CC::NE, MVT::i32));
> -}
> -
> -static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) {
> - SDLoc DL(Op);
> - SDValue LHS = Op.getOperand(0);
> - SDValue RHS = Op.getOperand(1);
> - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
> - EVT VT = Op.getValueType();
> - bool Invert = false;
> - SDValue Op0, Op1;
> - unsigned Opcode;
> -
> - if (LHS.getValueType().isInteger()) {
> -
> - // Attempt to use Vector Integer Compare Mask Test instruction.
> - // TST = icmp ne (and (op0, op1), zero).
> - if (CC == ISD::SETNE) {
> - if (((LHS.getOpcode() == ISD::AND) &&
> - ISD::isBuildVectorAllZeros(RHS.getNode())) ||
> - ((RHS.getOpcode() == ISD::AND) &&
> - ISD::isBuildVectorAllZeros(LHS.getNode()))) {
> -
> - SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS;
> - SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0));
> - SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1));
> - return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS);
> - }
> - }
> -
> - // Attempt to use Vector Integer Compare Mask against Zero instr (Signed).
> - // Note: Compare against Zero does not support unsigned predicates.
> - if ((ISD::isBuildVectorAllZeros(RHS.getNode()) ||
> - ISD::isBuildVectorAllZeros(LHS.getNode())) &&
> - !isUnsignedIntSetCC(CC)) {
> -
> - // If LHS is the zero value, swap operands and CondCode.
> - if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
> - CC = getSetCCSwappedOperands(CC);
> - Op0 = RHS;
> - } else
> - Op0 = LHS;
> -
> - // Ensure valid CondCode for Compare Mask against Zero instruction:
> - // EQ, GE, GT, LE, LT.
> - if (ISD::SETNE == CC) {
> - Invert = true;
> - CC = ISD::SETEQ;
> - }
> -
> - // Using constant type to differentiate integer and FP compares with zero.
> - Op1 = DAG.getConstant(0, MVT::i32);
> - Opcode = AArch64ISD::NEON_CMPZ;
> -
> - } else {
> - // Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned).
> - // Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT.
> - bool Swap = false;
> - switch (CC) {
> - default:
> - llvm_unreachable("Illegal integer comparison.");
> - case ISD::SETEQ:
> - case ISD::SETGT:
> - case ISD::SETGE:
> - case ISD::SETUGT:
> - case ISD::SETUGE:
> - break;
> - case ISD::SETNE:
> - Invert = true;
> - CC = ISD::SETEQ;
> - break;
> - case ISD::SETULT:
> - case ISD::SETULE:
> - case ISD::SETLT:
> - case ISD::SETLE:
> - Swap = true;
> - CC = getSetCCSwappedOperands(CC);
> - }
> -
> - if (Swap)
> - std::swap(LHS, RHS);
> -
> - Opcode = AArch64ISD::NEON_CMP;
> - Op0 = LHS;
> - Op1 = RHS;
> - }
> -
> - // Generate Compare Mask instr or Compare Mask against Zero instr.
> - SDValue NeonCmp =
> - DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
> -
> - if (Invert)
> - NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
> -
> - return NeonCmp;
> - }
> -
> - // Now handle Floating Point cases.
> - // Attempt to use Vector Floating Point Compare Mask against Zero instruction.
> - if (ISD::isBuildVectorAllZeros(RHS.getNode()) ||
> - ISD::isBuildVectorAllZeros(LHS.getNode())) {
> -
> - // If LHS is the zero value, swap operands and CondCode.
> - if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
> - CC = getSetCCSwappedOperands(CC);
> - Op0 = RHS;
> - } else
> - Op0 = LHS;
> -
> - // Using constant type to differentiate integer and FP compares with zero.
> - Op1 = DAG.getConstantFP(0, MVT::f32);
> - Opcode = AArch64ISD::NEON_CMPZ;
> - } else {
> - // Attempt to use Vector Floating Point Compare Mask instruction.
> - Op0 = LHS;
> - Op1 = RHS;
> - Opcode = AArch64ISD::NEON_CMP;
> - }
> -
> - SDValue NeonCmpAlt;
> - // Some register compares have to be implemented with swapped CC and operands,
> - // e.g.: OLT implemented as OGT with swapped operands.
> - bool SwapIfRegArgs = false;
> -
> - // Ensure valid CondCode for FP Compare Mask against Zero instruction:
> - // EQ, GE, GT, LE, LT.
> - // And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT.
> - switch (CC) {
> - default:
> - llvm_unreachable("Illegal FP comparison");
> - case ISD::SETUNE:
> - case ISD::SETNE:
> - Invert = true; // Fallthrough
> - case ISD::SETOEQ:
> - case ISD::SETEQ:
> - CC = ISD::SETEQ;
> - break;
> - case ISD::SETOLT:
> - case ISD::SETLT:
> - CC = ISD::SETLT;
> - SwapIfRegArgs = true;
> - break;
> - case ISD::SETOGT:
> - case ISD::SETGT:
> - CC = ISD::SETGT;
> - break;
> - case ISD::SETOLE:
> - case ISD::SETLE:
> - CC = ISD::SETLE;
> - SwapIfRegArgs = true;
> - break;
> - case ISD::SETOGE:
> - case ISD::SETGE:
> - CC = ISD::SETGE;
> - break;
> - case ISD::SETUGE:
> - Invert = true;
> - CC = ISD::SETLT;
> - SwapIfRegArgs = true;
> - break;
> - case ISD::SETULE:
> - Invert = true;
> - CC = ISD::SETGT;
> - break;
> - case ISD::SETUGT:
> - Invert = true;
> - CC = ISD::SETLE;
> - SwapIfRegArgs = true;
> - break;
> - case ISD::SETULT:
> - Invert = true;
> - CC = ISD::SETGE;
> - break;
> - case ISD::SETUEQ:
> - Invert = true; // Fallthrough
> - case ISD::SETONE:
> - // Expand this to (OGT |OLT).
> - NeonCmpAlt =
> - DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT));
> - CC = ISD::SETLT;
> - SwapIfRegArgs = true;
> - break;
> - case ISD::SETUO:
> - Invert = true; // Fallthrough
> - case ISD::SETO:
> - // Expand this to (OGE | OLT).
> - NeonCmpAlt =
> - DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE));
> - CC = ISD::SETLT;
> - SwapIfRegArgs = true;
> - break;
> - }
> -
> - if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) {
> - CC = getSetCCSwappedOperands(CC);
> - std::swap(Op0, Op1);
> - }
> -
> - // Generate FP Compare Mask instr or FP Compare Mask against Zero instr
> - SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
> -
> - if (NeonCmpAlt.getNode())
> - NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt);
> -
> - if (Invert)
> - NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
> -
> - return NeonCmp;
> -}
> -
> -// (SETCC lhs, rhs, condcode)
> -SDValue
> -AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
> - SDLoc dl(Op);
> - SDValue LHS = Op.getOperand(0);
> - SDValue RHS = Op.getOperand(1);
> - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
> - EVT VT = Op.getValueType();
> -
> - if (VT.isVector())
> - return LowerVectorSETCC(Op, DAG);
> -
> - if (LHS.getValueType() == MVT::f128) {
> - // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
> - // for the rest of the function (some i32 or i64 values).
> - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
> -
> - // If softenSetCCOperands returned a scalar, use it.
> - if (!RHS.getNode()) {
> - assert(LHS.getValueType() == Op.getValueType() &&
> - "Unexpected setcc expansion!");
> - return LHS;
> - }
> - }
> -
> - if (LHS.getValueType().isInteger()) {
> - SDValue A64cc;
> -
> - // Integers are handled in a separate function because the combinations of
> - // immediates and tests can get hairy and we may want to fiddle things.
> - SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
> -
> - return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
> - CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT),
> - A64cc);
> - }
> -
> - // Note that some LLVM floating-point CondCodes can't be lowered to a single
> - // conditional branch, hence FPCCToA64CC can set a second test, where either
> - // passing is sufficient.
> - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
> - CondCode = FPCCToA64CC(CC, Alternative);
> - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
> - SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
> - DAG.getCondCode(CC));
> - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
> - CmpOp, DAG.getConstant(1, VT),
> - DAG.getConstant(0, VT), A64cc);
> -
> - if (Alternative != A64CC::Invalid) {
> - A64cc = DAG.getConstant(Alternative, MVT::i32);
> - A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
> - DAG.getConstant(1, VT), A64SELECT_CC, A64cc);
> - }
> -
> - return A64SELECT_CC;
> -}
> -
> -static SDValue LowerVectorSELECT_CC(SDValue Op, SelectionDAG &DAG) {
> - SDLoc dl(Op);
> - SDValue LHS = Op.getOperand(0);
> - SDValue RHS = Op.getOperand(1);
> - SDValue IfTrue = Op.getOperand(2);
> - SDValue IfFalse = Op.getOperand(3);
> - EVT IfTrueVT = IfTrue.getValueType();
> - EVT CondVT = IfTrueVT.changeVectorElementTypeToInteger();
> - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
> -
> - // If LHS & RHS are floating point and IfTrue & IfFalse are vectors, we will
> - // use NEON compare.
> - if ((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)) {
> - EVT EltVT = LHS.getValueType();
> - unsigned EltNum = 128 / EltVT.getSizeInBits();
> - EVT VT = EVT::getVectorVT(*DAG.getContext(), EltVT, EltNum);
> - unsigned SubConstant =
> - (LHS.getValueType() == MVT::f32) ? AArch64::sub_32 :AArch64::sub_64;
> - EVT CEltT = (LHS.getValueType() == MVT::f32) ? MVT::i32 : MVT::i64;
> - EVT CVT = EVT::getVectorVT(*DAG.getContext(), CEltT, EltNum);
> -
> - LHS
> - = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
> - VT, DAG.getTargetConstant(0, MVT::i32), LHS,
> - DAG.getTargetConstant(SubConstant, MVT::i32)), 0);
> - RHS
> - = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
> - VT, DAG.getTargetConstant(0, MVT::i32), RHS,
> - DAG.getTargetConstant(SubConstant, MVT::i32)), 0);
> -
> - SDValue VSetCC = DAG.getSetCC(dl, CVT, LHS, RHS, CC);
> - SDValue ResCC = LowerVectorSETCC(VSetCC, DAG);
> - if (CEltT.getSizeInBits() < IfTrueVT.getSizeInBits()) {
> - EVT DUPVT =
> - EVT::getVectorVT(*DAG.getContext(), CEltT,
> - IfTrueVT.getSizeInBits() / CEltT.getSizeInBits());
> - ResCC = DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, DUPVT, ResCC,
> - DAG.getConstant(0, MVT::i64, false));
> -
> - ResCC = DAG.getNode(ISD::BITCAST, dl, CondVT, ResCC);
> - } else {
> - // FIXME: If IfTrue & IfFalse hold v1i8, v1i16 or v1i32, this function
> - // can't handle them and will hit this assert.
> - assert(CEltT.getSizeInBits() == IfTrueVT.getSizeInBits() &&
> - "Vector of IfTrue & IfFalse is too small.");
> -
> - unsigned ExEltNum =
> - EltNum * IfTrueVT.getSizeInBits() / ResCC.getValueSizeInBits();
> - EVT ExVT = EVT::getVectorVT(*DAG.getContext(), CEltT, ExEltNum);
> - ResCC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExVT, ResCC,
> - DAG.getConstant(0, MVT::i64, false));
> - ResCC = DAG.getNode(ISD::BITCAST, dl, CondVT, ResCC);
> - }
> - SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(),
> - ResCC, IfTrue, IfFalse);
> - return VSelect;
> - }
> -
> - // Here we handle the case that LHS & RHS are integer and IfTrue & IfFalse are
> - // vectors.
> - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
> - CondCode = FPCCToA64CC(CC, Alternative);
> - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
> - SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
> - DAG.getCondCode(CC));
> - EVT SEVT = MVT::i32;
> - if (IfTrue.getValueType().getVectorElementType().getSizeInBits() > 32)
> - SEVT = MVT::i64;
> - SDValue AllOne = DAG.getConstant(-1, SEVT);
> - SDValue AllZero = DAG.getConstant(0, SEVT);
> - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, SEVT, SetCC,
> - AllOne, AllZero, A64cc);
> -
> - if (Alternative != A64CC::Invalid) {
> - A64cc = DAG.getConstant(Alternative, MVT::i32);
> - A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
> - SetCC, AllOne, A64SELECT_CC, A64cc);
> - }
> - SDValue VDup;
> - if (IfTrue.getValueType().getVectorNumElements() == 1)
> - VDup = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, CondVT, A64SELECT_CC);
> - else
> - VDup = DAG.getNode(AArch64ISD::NEON_VDUP, dl, CondVT, A64SELECT_CC);
> - SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(),
> - VDup, IfTrue, IfFalse);
> - return VSelect;
> -}
> -
> -// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
> -SDValue
> -AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
> - SDLoc dl(Op);
> - SDValue LHS = Op.getOperand(0);
> - SDValue RHS = Op.getOperand(1);
> - SDValue IfTrue = Op.getOperand(2);
> - SDValue IfFalse = Op.getOperand(3);
> - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
> -
> - if (IfTrue.getValueType().isVector())
> - return LowerVectorSELECT_CC(Op, DAG);
> -
> - if (LHS.getValueType() == MVT::f128) {
> - // f128 comparisons are lowered to libcalls, but slot in nicely here
> - // afterwards.
> - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
> -
> - // If softenSetCCOperands returned a scalar, we need to compare the result
> - // against zero to select between true and false values.
> - if (!RHS.getNode()) {
> - RHS = DAG.getConstant(0, LHS.getValueType());
> - CC = ISD::SETNE;
> - }
> - }
> -
> - if (LHS.getValueType().isInteger()) {
> - SDValue A64cc;
> -
> - // Integers are handled in a separate function because the combinations of
> - // immediates and tests can get hairy and we may want to fiddle things.
> - SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
> -
> - return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), CmpOp,
> - IfTrue, IfFalse, A64cc);
> - }
> -
> - // Note that some LLVM floating-point CondCodes can't be lowered to a single
> - // conditional branch, hence FPCCToA64CC can set a second test, where either
> - // passing is sufficient.
> - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
> - CondCode = FPCCToA64CC(CC, Alternative);
> - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
> - SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
> - DAG.getCondCode(CC));
> - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
> - Op.getValueType(),
> - SetCC, IfTrue, IfFalse, A64cc);
> -
> - if (Alternative != A64CC::Invalid) {
> - A64cc = DAG.getConstant(Alternative, MVT::i32);
> - A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
> - SetCC, IfTrue, A64SELECT_CC, A64cc);
> -
> - }
> -
> - return A64SELECT_CC;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
> - const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
> - const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
> -
> - // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
> - // rather than just 8.
> - return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op),
> - Op.getOperand(1), Op.getOperand(2),
> - DAG.getConstant(32, MVT::i32), 8, false, false,
> - MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
> - // The layout of the va_list struct is specified in the AArch64 Procedure Call
> - // Standard, section B.3.
> - MachineFunction &MF = DAG.getMachineFunction();
> - AArch64MachineFunctionInfo *FuncInfo
> - = MF.getInfo<AArch64MachineFunctionInfo>();
> - SDLoc DL(Op);
> -
> - SDValue Chain = Op.getOperand(0);
> - SDValue VAList = Op.getOperand(1);
> - const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
> - SmallVector<SDValue, 4> MemOps;
> -
> - // void *__stack at offset 0
> - SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(),
> - getPointerTy());
> - MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
> - MachinePointerInfo(SV), false, false, 0));
> -
> - // void *__gr_top at offset 8
> - int GPRSize = FuncInfo->getVariadicGPRSize();
> - if (GPRSize > 0) {
> - SDValue GRTop, GRTopAddr;
> -
> - GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
> - DAG.getConstant(8, getPointerTy()));
> -
> - GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy());
> - GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
> - DAG.getConstant(GPRSize, getPointerTy()));
> -
> - MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
> - MachinePointerInfo(SV, 8),
> - false, false, 0));
> - }
> -
> - // void *__vr_top at offset 16
> - int FPRSize = FuncInfo->getVariadicFPRSize();
> - if (FPRSize > 0) {
> - SDValue VRTop, VRTopAddr;
> - VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
> - DAG.getConstant(16, getPointerTy()));
> -
> - VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy());
> - VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
> - DAG.getConstant(FPRSize, getPointerTy()));
> -
> - MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
> - MachinePointerInfo(SV, 16),
> - false, false, 0));
> - }
> -
> - // int __gr_offs at offset 24
> - SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
> - DAG.getConstant(24, getPointerTy()));
> - MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
> - GROffsAddr, MachinePointerInfo(SV, 24),
> - false, false, 0));
> -
> - // int __vr_offs at offset 28
> - SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
> - DAG.getConstant(28, getPointerTy()));
> - MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
> - VROffsAddr, MachinePointerInfo(SV, 28),
> - false, false, 0));
> -
> - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
> - switch (Op.getOpcode()) {
> - default: llvm_unreachable("Don't know how to custom lower this!");
> - case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128);
> - case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128);
> - case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128);
> - case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128);
> - case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true);
> - case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false);
> - case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true);
> - case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
> - case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
> - case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
> - case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
> - case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
> -
> - case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
> - case ISD::SRL_PARTS:
> - case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
> -
> - case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
> - case ISD::BRCOND: return LowerBRCOND(Op, DAG);
> - case ISD::BR_CC: return LowerBR_CC(Op, DAG);
> - case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG);
> - case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
> - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
> - case ISD::JumpTable: return LowerJumpTable(Op, DAG);
> - case ISD::SELECT: return LowerSELECT(Op, DAG);
> - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
> - case ISD::SETCC: return LowerSETCC(Op, DAG);
> - case ISD::VACOPY: return LowerVACOPY(Op, DAG);
> - case ISD::VASTART: return LowerVASTART(Op, DAG);
> - case ISD::BUILD_VECTOR:
> - return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
> - case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
> - case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
> - }
> -
> - return SDValue();
> -}
> -
> -/// Check if the specified splat value corresponds to a valid vector constant
> -/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI). If
> -/// so, return the encoded 8-bit immediate and the OpCmode instruction fields
> -/// values.
> -static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
> - unsigned SplatBitSize, SelectionDAG &DAG,
> - bool is128Bits, NeonModImmType type, EVT &VT,
> - unsigned &Imm, unsigned &OpCmode) {
> - switch (SplatBitSize) {
> - default:
> - llvm_unreachable("unexpected size for isNeonModifiedImm");
> - case 8: {
> - if (type != Neon_Mov_Imm)
> - return false;
> - assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
> - // Neon movi per byte: Op=0, Cmode=1110.
> - OpCmode = 0xe;
> - Imm = SplatBits;
> - VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
> - break;
> - }
> - case 16: {
> - // Neon move inst per halfword
> - VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
> - if ((SplatBits & ~0xff) == 0) {
> - // Value = 0x00nn is 0x00nn LSL 0
> - // movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000
> - // bic: Op=1, Cmode=1001; orr: Op=0, Cmode=1001
> - // Op=x, Cmode=100y
> - Imm = SplatBits;
> - OpCmode = 0x8;
> - break;
> - }
> - if ((SplatBits & ~0xff00) == 0) {
> - // Value = 0xnn00 is 0x00nn LSL 8
> - // movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010
> - // bic: Op=1, Cmode=1011; orr: Op=0, Cmode=1011
> - // Op=x, Cmode=101x
> - Imm = SplatBits >> 8;
> - OpCmode = 0xa;
> - break;
> - }
> - // can't handle any other
> - return false;
> - }
> -
> - case 32: {
> - // First the LSL variants (MSL is unusable by some interested instructions).
> -
> - // Neon move instr per word, shift zeros
> - VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
> - if ((SplatBits & ~0xff) == 0) {
> - // Value = 0x000000nn is 0x000000nn LSL 0
> - // movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000
> - // bic: Op=1, Cmode= 0001; orr: Op=0, Cmode= 0001
> - // Op=x, Cmode=000x
> - Imm = SplatBits;
> - OpCmode = 0;
> - break;
> - }
> - if ((SplatBits & ~0xff00) == 0) {
> - // Value = 0x0000nn00 is 0x000000nn LSL 8
> - // movi: Op=0, Cmode= 0010; mvni: Op=1, Cmode= 0010
> - // bic: Op=1, Cmode= 0011; orr : Op=0, Cmode= 0011
> - // Op=x, Cmode=001x
> - Imm = SplatBits >> 8;
> - OpCmode = 0x2;
> - break;
> - }
> - if ((SplatBits & ~0xff0000) == 0) {
> - // Value = 0x00nn0000 is 0x000000nn LSL 16
> - // movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100
> - // bic: Op=1, Cmode= 0101; orr: Op=0, Cmode= 0101
> - // Op=x, Cmode=010x
> - Imm = SplatBits >> 16;
> - OpCmode = 0x4;
> - break;
> - }
> - if ((SplatBits & ~0xff000000) == 0) {
> - // Value = 0xnn000000 is 0x000000nn LSL 24
> - // movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110
> - // bic: Op=1, Cmode= 0111; orr: Op=0, Cmode= 0111
> - // Op=x, Cmode=011x
> - Imm = SplatBits >> 24;
> - OpCmode = 0x6;
> - break;
> - }
> -
> - // Now the MSL immediates.
> -
> - // Neon move instr per word, shift ones
> - if ((SplatBits & ~0xffff) == 0 &&
> - ((SplatBits | SplatUndef) & 0xff) == 0xff) {
> - // Value = 0x0000nnff is 0x000000nn MSL 8
> - // movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100
> - // Op=x, Cmode=1100
> - Imm = SplatBits >> 8;
> - OpCmode = 0xc;
> - break;
> - }
> - if ((SplatBits & ~0xffffff) == 0 &&
> - ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
> - // Value = 0x00nnffff is 0x000000nn MSL 16
> - // movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101
> - // Op=x, Cmode=1101
> - Imm = SplatBits >> 16;
> - OpCmode = 0xd;
> - break;
> - }
> - // can't handle any other
> - return false;
> - }
> -
> - case 64: {
> - if (type != Neon_Mov_Imm)
> - return false;
> - // Neon move instr bytemask, where each byte is either 0x00 or 0xff.
> - // movi Op=1, Cmode=1110.
> - OpCmode = 0x1e;
> - uint64_t BitMask = 0xff;
> - uint64_t Val = 0;
> - unsigned ImmMask = 1;
> - Imm = 0;
> - for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
> - if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
> - Val |= BitMask;
> - Imm |= ImmMask;
> - } else if ((SplatBits & BitMask) != 0) {
> - return false;
> - }
> - BitMask <<= 8;
> - ImmMask <<= 1;
> - }
> - SplatBits = Val;
> - VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
> - break;
> - }
> - }
> -
> - return true;
> -}
> -
> -static SDValue PerformANDCombine(SDNode *N,
> - TargetLowering::DAGCombinerInfo &DCI) {
> -
> - SelectionDAG &DAG = DCI.DAG;
> - SDLoc DL(N);
> - EVT VT = N->getValueType(0);
> -
> - // We're looking for an SRA/SHL pair which form an SBFX.
> -
> - if (VT != MVT::i32 && VT != MVT::i64)
> - return SDValue();
> -
> - if (!isa<ConstantSDNode>(N->getOperand(1)))
> - return SDValue();
> -
> - uint64_t TruncMask = N->getConstantOperandVal(1);
> - if (!isMask_64(TruncMask))
> - return SDValue();
> -
> - uint64_t Width = CountPopulation_64(TruncMask);
> - SDValue Shift = N->getOperand(0);
> -
> - if (Shift.getOpcode() != ISD::SRL)
> - return SDValue();
> -
> - if (!isa<ConstantSDNode>(Shift->getOperand(1)))
> - return SDValue();
> - uint64_t LSB = Shift->getConstantOperandVal(1);
> -
> - if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
> - return SDValue();
> -
> - return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0),
> - DAG.getConstant(LSB, MVT::i64),
> - DAG.getConstant(LSB + Width - 1, MVT::i64));
> -}
> -
> -/// For a true bitfield insert, the bits getting into that contiguous mask
> -/// should come from the low part of an existing value: they must be formed from
> -/// a compatible SHL operation (unless they're already low). This function
> -/// checks that condition and returns the least-significant bit that's
> -/// intended. If the operation not a field preparation, -1 is returned.
> -static int32_t getLSBForBFI(SelectionDAG &DAG, SDLoc DL, EVT VT,
> - SDValue &MaskedVal, uint64_t Mask) {
> - if (!isShiftedMask_64(Mask))
> - return -1;
> -
> - // Now we need to alter MaskedVal so that it is an appropriate input for a BFI
> - // instruction. BFI will do a left-shift by LSB before applying the mask we've
> - // spotted, so in general we should pre-emptively "undo" that by making sure
> - // the incoming bits have had a right-shift applied to them.
> - //
> - // This right shift, however, will combine with existing left/right shifts. In
> - // the simplest case of a completely straight bitfield operation, it will be
> - // expected to completely cancel out with an existing SHL. More complicated
> - // cases (e.g. bitfield to bitfield copy) may still need a real shift before
> - // the BFI.
> -
> - uint64_t LSB = countTrailingZeros(Mask);
> - int64_t ShiftRightRequired = LSB;
> - if (MaskedVal.getOpcode() == ISD::SHL &&
> - isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
> - ShiftRightRequired -= MaskedVal.getConstantOperandVal(1);
> - MaskedVal = MaskedVal.getOperand(0);
> - } else if (MaskedVal.getOpcode() == ISD::SRL &&
> - isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
> - ShiftRightRequired += MaskedVal.getConstantOperandVal(1);
> - MaskedVal = MaskedVal.getOperand(0);
> - }
> -
> - if (ShiftRightRequired > 0)
> - MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal,
> - DAG.getConstant(ShiftRightRequired, MVT::i64));
> - else if (ShiftRightRequired < 0) {
> - // We could actually end up with a residual left shift, for example with
> - // "struc.bitfield = val << 1".
> - MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal,
> - DAG.getConstant(-ShiftRightRequired, MVT::i64));
> - }
> -
> - return LSB;
> -}
> -
> -/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by
> -/// a mask and an extension. Returns true if a BFI was found and provides
> -/// information on its surroundings.
> -static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask,
> - bool &Extended) {
> - Extended = false;
> - if (N.getOpcode() == ISD::ZERO_EXTEND) {
> - Extended = true;
> - N = N.getOperand(0);
> - }
> -
> - if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
> - Mask = N->getConstantOperandVal(1);
> - N = N.getOperand(0);
> - } else {
> - // Mask is the whole width.
> - Mask = -1ULL >> (64 - N.getValueType().getSizeInBits());
> - }
> -
> - if (N.getOpcode() == AArch64ISD::BFI) {
> - BFI = N;
> - return true;
> - }
> -
> - return false;
> -}
> -
> -/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which
> -/// is roughly equivalent to (and (BFI ...), mask). This form is used because it
> -/// can often be further combined with a larger mask. Ultimately, we want mask
> -/// to be 2^32-1 or 2^64-1 so the AND can be skipped.
> -static SDValue tryCombineToBFI(SDNode *N,
> - TargetLowering::DAGCombinerInfo &DCI,
> - const AArch64Subtarget *Subtarget) {
> - SelectionDAG &DAG = DCI.DAG;
> - SDLoc DL(N);
> - EVT VT = N->getValueType(0);
> -
> - assert(N->getOpcode() == ISD::OR && "Unexpected root");
> -
> - // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or
> - // abandon the effort.
> - SDValue LHS = N->getOperand(0);
> - if (LHS.getOpcode() != ISD::AND)
> - return SDValue();
> -
> - uint64_t LHSMask;
> - if (isa<ConstantSDNode>(LHS.getOperand(1)))
> - LHSMask = LHS->getConstantOperandVal(1);
> - else
> - return SDValue();
> -
> - // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask
> - // is or abandon the effort.
> - SDValue RHS = N->getOperand(1);
> - if (RHS.getOpcode() != ISD::AND)
> - return SDValue();
> -
> - uint64_t RHSMask;
> - if (isa<ConstantSDNode>(RHS.getOperand(1)))
> - RHSMask = RHS->getConstantOperandVal(1);
> - else
> - return SDValue();
> -
> - // Can't do anything if the masks are incompatible.
> - if (LHSMask & RHSMask)
> - return SDValue();
> -
> - // Now we need one of the masks to be a contiguous field. Without loss of
> - // generality that should be the RHS one.
> - SDValue Bitfield = LHS.getOperand(0);
> - if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) {
> - // We know that LHS is a candidate new value, and RHS isn't already a better
> - // one.
> - std::swap(LHS, RHS);
> - std::swap(LHSMask, RHSMask);
> - }
> -
> - // We've done our best to put the right operands in the right places, all we
> - // can do now is check whether a BFI exists.
> - Bitfield = RHS.getOperand(0);
> - int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask);
> - if (LSB == -1)
> - return SDValue();
> -
> - uint32_t Width = CountPopulation_64(RHSMask);
> - assert(Width && "Expected non-zero bitfield width");
> -
> - SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
> - LHS.getOperand(0), Bitfield,
> - DAG.getConstant(LSB, MVT::i64),
> - DAG.getConstant(Width, MVT::i64));
> -
> - // Mask is trivial
> - if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits())))
> - return BFI;
> -
> - return DAG.getNode(ISD::AND, DL, VT, BFI,
> - DAG.getConstant(LHSMask | RHSMask, VT));
> -}
> -
> -/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its
> -/// original input. This is surprisingly common because SROA splits things up
> -/// into i8 chunks, so the originally detected MaskedBFI may actually only act
> -/// on the low (say) byte of a word. This is then orred into the rest of the
> -/// word afterwards.
> -///
> -/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)).
> -///
> -/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the
> -/// MaskedBFI. We can also deal with a certain amount of extend/truncate being
> -/// involved.
> -static SDValue tryCombineToLargerBFI(SDNode *N,
> - TargetLowering::DAGCombinerInfo &DCI,
> - const AArch64Subtarget *Subtarget) {
> - SelectionDAG &DAG = DCI.DAG;
> - SDLoc DL(N);
> - EVT VT = N->getValueType(0);
> -
> - // First job is to hunt for a MaskedBFI on either the left or right. Swap
> - // operands if it's actually on the right.
> - SDValue BFI;
> - SDValue PossExtraMask;
> - uint64_t ExistingMask = 0;
> - bool Extended = false;
> - if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended))
> - PossExtraMask = N->getOperand(1);
> - else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended))
> - PossExtraMask = N->getOperand(0);
> - else
> - return SDValue();
> -
> - // We can only combine a BFI with another compatible mask.
> - if (PossExtraMask.getOpcode() != ISD::AND ||
> - !isa<ConstantSDNode>(PossExtraMask.getOperand(1)))
> - return SDValue();
> -
> - uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1);
> -
> - // Masks must be compatible.
> - if (ExtraMask & ExistingMask)
> - return SDValue();
> -
> - SDValue OldBFIVal = BFI.getOperand(0);
> - SDValue NewBFIVal = BFI.getOperand(1);
> - if (Extended) {
> - // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be
> - // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments
> - // need to be made compatible.
> - assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32
> - && "Invalid types for BFI");
> - OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal);
> - NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal);
> - }
> -
> - // We need the MaskedBFI to be combined with a mask of the *same* value.
> - if (PossExtraMask.getOperand(0) != OldBFIVal)
> - return SDValue();
> -
> - BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
> - OldBFIVal, NewBFIVal,
> - BFI.getOperand(2), BFI.getOperand(3));
> -
> - // If the masking is trivial, we don't need to create it.
> - if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits())))
> - return BFI;
> -
> - return DAG.getNode(ISD::AND, DL, VT, BFI,
> - DAG.getConstant(ExtraMask | ExistingMask, VT));
> -}
> -
> -/// An EXTR instruction is made up of two shifts, ORed together. This helper
> -/// searches for and classifies those shifts.
> -static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
> - bool &FromHi) {
> - if (N.getOpcode() == ISD::SHL)
> - FromHi = false;
> - else if (N.getOpcode() == ISD::SRL)
> - FromHi = true;
> - else
> - return false;
> -
> - if (!isa<ConstantSDNode>(N.getOperand(1)))
> - return false;
> -
> - ShiftAmount = N->getConstantOperandVal(1);
> - Src = N->getOperand(0);
> - return true;
> -}
> -
> -/// EXTR instruction extracts a contiguous chunk of bits from two existing
> -/// registers viewed as a high/low pair. This function looks for the pattern:
> -/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
> -/// EXTR. Can't quite be done in TableGen because the two immediates aren't
> -/// independent.
> -static SDValue tryCombineToEXTR(SDNode *N,
> - TargetLowering::DAGCombinerInfo &DCI) {
> - SelectionDAG &DAG = DCI.DAG;
> - SDLoc DL(N);
> - EVT VT = N->getValueType(0);
> -
> - assert(N->getOpcode() == ISD::OR && "Unexpected root");
> -
> - if (VT != MVT::i32 && VT != MVT::i64)
> - return SDValue();
> -
> - SDValue LHS;
> - uint32_t ShiftLHS = 0;
> - bool LHSFromHi = 0;
> - if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
> - return SDValue();
> -
> - SDValue RHS;
> - uint32_t ShiftRHS = 0;
> - bool RHSFromHi = 0;
> - if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
> - return SDValue();
> -
> - // If they're both trying to come from the high part of the register, they're
> - // not really an EXTR.
> - if (LHSFromHi == RHSFromHi)
> - return SDValue();
> -
> - if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
> - return SDValue();
> -
> - if (LHSFromHi) {
> - std::swap(LHS, RHS);
> - std::swap(ShiftLHS, ShiftRHS);
> - }
> -
> - return DAG.getNode(AArch64ISD::EXTR, DL, VT,
> - LHS, RHS,
> - DAG.getConstant(ShiftRHS, MVT::i64));
> -}
> -
> -/// Target-specific dag combine xforms for ISD::OR
> -static SDValue PerformORCombine(SDNode *N,
> - TargetLowering::DAGCombinerInfo &DCI,
> - const AArch64Subtarget *Subtarget) {
> -
> - SelectionDAG &DAG = DCI.DAG;
> - SDLoc DL(N);
> - EVT VT = N->getValueType(0);
> -
> - if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
> - return SDValue();
> -
> - // Attempt to recognise bitfield-insert operations.
> - SDValue Res = tryCombineToBFI(N, DCI, Subtarget);
> - if (Res.getNode())
> - return Res;
> -
> - // Attempt to combine an existing MaskedBFI operation into one with a larger
> - // mask.
> - Res = tryCombineToLargerBFI(N, DCI, Subtarget);
> - if (Res.getNode())
> - return Res;
> -
> - Res = tryCombineToEXTR(N, DCI);
> - if (Res.getNode())
> - return Res;
> -
> - if (!Subtarget->hasNEON())
> - return SDValue();
> -
> - // Attempt to use vector immediate-form BSL
> - // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
> -
> - SDValue N0 = N->getOperand(0);
> - if (N0.getOpcode() != ISD::AND)
> - return SDValue();
> -
> - SDValue N1 = N->getOperand(1);
> - if (N1.getOpcode() != ISD::AND)
> - return SDValue();
> -
> - if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
> - APInt SplatUndef;
> - unsigned SplatBitSize;
> - bool HasAnyUndefs;
> - BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
> - APInt SplatBits0;
> - if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
> - HasAnyUndefs) &&
> - !HasAnyUndefs) {
> - BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
> - APInt SplatBits1;
> - if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
> - HasAnyUndefs) && !HasAnyUndefs &&
> - SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
> - SplatBits0 == ~SplatBits1) {
> -
> - return DAG.getNode(ISD::VSELECT, DL, VT, N0->getOperand(1),
> - N0->getOperand(0), N1->getOperand(0));
> - }
> - }
> - }
> -
> - return SDValue();
> -}
> -
> -/// Target-specific dag combine xforms for ISD::SRA
> -static SDValue PerformSRACombine(SDNode *N,
> - TargetLowering::DAGCombinerInfo &DCI) {
> -
> - SelectionDAG &DAG = DCI.DAG;
> - SDLoc DL(N);
> - EVT VT = N->getValueType(0);
> -
> - // We're looking for an SRA/SHL pair which form an SBFX.
> -
> - if (VT != MVT::i32 && VT != MVT::i64)
> - return SDValue();
> -
> - if (!isa<ConstantSDNode>(N->getOperand(1)))
> - return SDValue();
> -
> - uint64_t ExtraSignBits = N->getConstantOperandVal(1);
> - SDValue Shift = N->getOperand(0);
> -
> - if (Shift.getOpcode() != ISD::SHL)
> - return SDValue();
> -
> - if (!isa<ConstantSDNode>(Shift->getOperand(1)))
> - return SDValue();
> -
> - uint64_t BitsOnLeft = Shift->getConstantOperandVal(1);
> - uint64_t Width = VT.getSizeInBits() - ExtraSignBits;
> - uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft;
> -
> - if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
> - return SDValue();
> -
> - return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0),
> - DAG.getConstant(LSB, MVT::i64),
> - DAG.getConstant(LSB + Width - 1, MVT::i64));
> -}
> -
> -/// Check if this is a valid build_vector for the immediate operand of
> -/// a vector shift operation, where all the elements of the build_vector
> -/// must have the same constant integer value.
> -static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
> - // Ignore bit_converts.
> - while (Op.getOpcode() == ISD::BITCAST)
> - Op = Op.getOperand(0);
> - BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
> - APInt SplatBits, SplatUndef;
> - unsigned SplatBitSize;
> - bool HasAnyUndefs;
> - if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
> - HasAnyUndefs, ElementBits) ||
> - SplatBitSize > ElementBits)
> - return false;
> - Cnt = SplatBits.getSExtValue();
> - return true;
> -}
> -
> -/// Check if this is a valid build_vector for the immediate operand of
> -/// a vector shift left operation. That value must be in the range:
> -/// 0 <= Value < ElementBits
> -static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) {
> - assert(VT.isVector() && "vector shift count is not a vector type");
> - unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
> - if (!getVShiftImm(Op, ElementBits, Cnt))
> - return false;
> - return (Cnt >= 0 && Cnt < ElementBits);
> -}
> -
> -/// Check if this is a valid build_vector for the immediate operand of a
> -/// vector shift right operation. The value must be in the range:
> -/// 1 <= Value <= ElementBits
> -static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) {
> - assert(VT.isVector() && "vector shift count is not a vector type");
> - unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
> - if (!getVShiftImm(Op, ElementBits, Cnt))
> - return false;
> - return (Cnt >= 1 && Cnt <= ElementBits);
> -}
> -
> -static SDValue GenForSextInreg(SDNode *N,
> - TargetLowering::DAGCombinerInfo &DCI,
> - EVT SrcVT, EVT DestVT, EVT SubRegVT,
> - const int *Mask, SDValue Src) {
> - SelectionDAG &DAG = DCI.DAG;
> - SDValue Bitcast
> - = DAG.getNode(ISD::BITCAST, SDLoc(N), SrcVT, Src);
> - SDValue Sext
> - = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), DestVT, Bitcast);
> - SDValue ShuffleVec
> - = DAG.getVectorShuffle(DestVT, SDLoc(N), Sext, DAG.getUNDEF(DestVT), Mask);
> - SDValue ExtractSubreg
> - = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N),
> - SubRegVT, ShuffleVec,
> - DAG.getTargetConstant(AArch64::sub_64, MVT::i32)), 0);
> - return ExtractSubreg;
> -}
> -
> -/// Checks for vector shifts and lowers them.
> -static SDValue PerformShiftCombine(SDNode *N,
> - TargetLowering::DAGCombinerInfo &DCI,
> - const AArch64Subtarget *ST) {
> - SelectionDAG &DAG = DCI.DAG;
> - EVT VT = N->getValueType(0);
> - if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64))
> - return PerformSRACombine(N, DCI);
> -
> - // We're looking for an SRA/SHL pair to help generating instruction
> - // sshll v0.8h, v0.8b, #0
> - // The instruction STXL is also the alias of this instruction.
> - //
> - // For example, for DAG like below,
> - // v2i32 = sra (v2i32 (shl v2i32, 16)), 16
> - // we can transform it into
> - // v2i32 = EXTRACT_SUBREG
> - // (v4i32 (suffle_vector
> - // (v4i32 (sext (v4i16 (bitcast v2i32))),
> - // undef, (0, 2, u, u)),
> - // sub_64
> - //
> - // With this transformation we expect to generate "SSHLL + UZIP1"
> - // Sometimes UZIP1 can be optimized away by combining with other context.
> - int64_t ShrCnt, ShlCnt;
> - if (N->getOpcode() == ISD::SRA
> - && (VT == MVT::v2i32 || VT == MVT::v4i16)
> - && isVShiftRImm(N->getOperand(1), VT, ShrCnt)
> - && N->getOperand(0).getOpcode() == ISD::SHL
> - && isVShiftRImm(N->getOperand(0).getOperand(1), VT, ShlCnt)) {
> - SDValue Src = N->getOperand(0).getOperand(0);
> - if (VT == MVT::v2i32 && ShrCnt == 16 && ShlCnt == 16) {
> - // sext_inreg(v2i32, v2i16)
> - // We essentially only care the Mask {0, 2, u, u}
> - int Mask[4] = {0, 2, 4, 6};
> - return GenForSextInreg(N, DCI, MVT::v4i16, MVT::v4i32, MVT::v2i32,
> - Mask, Src);
> - }
> - else if (VT == MVT::v2i32 && ShrCnt == 24 && ShlCnt == 24) {
> - // sext_inreg(v2i16, v2i8)
> - // We essentially only care the Mask {0, u, 4, u, u, u, u, u, u, u, u, u}
> - int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14};
> - return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v2i32,
> - Mask, Src);
> - }
> - else if (VT == MVT::v4i16 && ShrCnt == 8 && ShlCnt == 8) {
> - // sext_inreg(v4i16, v4i8)
> - // We essentially only care the Mask {0, 2, 4, 6, u, u, u, u, u, u, u, u}
> - int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14};
> - return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v4i16,
> - Mask, Src);
> - }
> - }
> -
> - // Nothing to be done for scalar shifts.
> - const TargetLowering &TLI = DAG.getTargetLoweringInfo();
> - if (!VT.isVector() || !TLI.isTypeLegal(VT))
> - return SDValue();
> -
> - assert(ST->hasNEON() && "unexpected vector shift");
> - int64_t Cnt;
> -
> - switch (N->getOpcode()) {
> - default:
> - llvm_unreachable("unexpected shift opcode");
> -
> - case ISD::SHL:
> - if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
> - SDValue RHS =
> - DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
> - DAG.getConstant(Cnt, MVT::i32));
> - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
> - }
> - break;
> -
> - case ISD::SRA:
> - case ISD::SRL:
> - if (isVShiftRImm(N->getOperand(1), VT, Cnt)) {
> - SDValue RHS =
> - DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
> - DAG.getConstant(Cnt, MVT::i32));
> - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS);
> - }
> - break;
> - }
> -
> - return SDValue();
> -}
> -
> -/// ARM-specific DAG combining for intrinsics.
> -static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
> - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
> -
> - switch (IntNo) {
> - default:
> - // Don't do anything for most intrinsics.
> - break;
> -
> - case Intrinsic::arm_neon_vqshifts:
> - case Intrinsic::arm_neon_vqshiftu:
> - EVT VT = N->getOperand(1).getValueType();
> - int64_t Cnt;
> - if (!isVShiftLImm(N->getOperand(2), VT, Cnt))
> - break;
> - unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts)
> - ? AArch64ISD::NEON_QSHLs
> - : AArch64ISD::NEON_QSHLu;
> - return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
> - N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
> - }
> -
> - return SDValue();
> -}
> -
> -/// Target-specific DAG combine function for NEON load/store intrinsics
> -/// to merge base address updates.
> -static SDValue CombineBaseUpdate(SDNode *N,
> - TargetLowering::DAGCombinerInfo &DCI) {
> - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
> - return SDValue();
> -
> - SelectionDAG &DAG = DCI.DAG;
> - bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
> - N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
> - unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
> - SDValue Addr = N->getOperand(AddrOpIdx);
> -
> - // Search for a use of the address operand that is an increment.
> - for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
> - UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
> - SDNode *User = *UI;
> - if (User->getOpcode() != ISD::ADD ||
> - UI.getUse().getResNo() != Addr.getResNo())
> - continue;
> -
> - // Check that the add is independent of the load/store. Otherwise, folding
> - // it would create a cycle.
> - if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
> - continue;
> -
> - // Find the new opcode for the updating load/store.
> - bool isLoad = true;
> - bool isLaneOp = false;
> - unsigned NewOpc = 0;
> - unsigned NumVecs = 0;
> - if (isIntrinsic) {
> - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
> - switch (IntNo) {
> - default: llvm_unreachable("unexpected intrinsic for Neon base update");
> - case Intrinsic::arm_neon_vld1: NewOpc = AArch64ISD::NEON_LD1_UPD;
> - NumVecs = 1; break;
> - case Intrinsic::arm_neon_vld2: NewOpc = AArch64ISD::NEON_LD2_UPD;
> - NumVecs = 2; break;
> - case Intrinsic::arm_neon_vld3: NewOpc = AArch64ISD::NEON_LD3_UPD;
> - NumVecs = 3; break;
> - case Intrinsic::arm_neon_vld4: NewOpc = AArch64ISD::NEON_LD4_UPD;
> - NumVecs = 4; break;
> - case Intrinsic::arm_neon_vst1: NewOpc = AArch64ISD::NEON_ST1_UPD;
> - NumVecs = 1; isLoad = false; break;
> - case Intrinsic::arm_neon_vst2: NewOpc = AArch64ISD::NEON_ST2_UPD;
> - NumVecs = 2; isLoad = false; break;
> - case Intrinsic::arm_neon_vst3: NewOpc = AArch64ISD::NEON_ST3_UPD;
> - NumVecs = 3; isLoad = false; break;
> - case Intrinsic::arm_neon_vst4: NewOpc = AArch64ISD::NEON_ST4_UPD;
> - NumVecs = 4; isLoad = false; break;
> - case Intrinsic::aarch64_neon_vld1x2: NewOpc = AArch64ISD::NEON_LD1x2_UPD;
> - NumVecs = 2; break;
> - case Intrinsic::aarch64_neon_vld1x3: NewOpc = AArch64ISD::NEON_LD1x3_UPD;
> - NumVecs = 3; break;
> - case Intrinsic::aarch64_neon_vld1x4: NewOpc = AArch64ISD::NEON_LD1x4_UPD;
> - NumVecs = 4; break;
> - case Intrinsic::aarch64_neon_vst1x2: NewOpc = AArch64ISD::NEON_ST1x2_UPD;
> - NumVecs = 2; isLoad = false; break;
> - case Intrinsic::aarch64_neon_vst1x3: NewOpc = AArch64ISD::NEON_ST1x3_UPD;
> - NumVecs = 3; isLoad = false; break;
> - case Intrinsic::aarch64_neon_vst1x4: NewOpc = AArch64ISD::NEON_ST1x4_UPD;
> - NumVecs = 4; isLoad = false; break;
> - case Intrinsic::arm_neon_vld2lane: NewOpc = AArch64ISD::NEON_LD2LN_UPD;
> - NumVecs = 2; isLaneOp = true; break;
> - case Intrinsic::arm_neon_vld3lane: NewOpc = AArch64ISD::NEON_LD3LN_UPD;
> - NumVecs = 3; isLaneOp = true; break;
> - case Intrinsic::arm_neon_vld4lane: NewOpc = AArch64ISD::NEON_LD4LN_UPD;
> - NumVecs = 4; isLaneOp = true; break;
> - case Intrinsic::arm_neon_vst2lane: NewOpc = AArch64ISD::NEON_ST2LN_UPD;
> - NumVecs = 2; isLoad = false; isLaneOp = true; break;
> - case Intrinsic::arm_neon_vst3lane: NewOpc = AArch64ISD::NEON_ST3LN_UPD;
> - NumVecs = 3; isLoad = false; isLaneOp = true; break;
> - case Intrinsic::arm_neon_vst4lane: NewOpc = AArch64ISD::NEON_ST4LN_UPD;
> - NumVecs = 4; isLoad = false; isLaneOp = true; break;
> - }
> - } else {
> - isLaneOp = true;
> - switch (N->getOpcode()) {
> - default: llvm_unreachable("unexpected opcode for Neon base update");
> - case AArch64ISD::NEON_LD2DUP: NewOpc = AArch64ISD::NEON_LD2DUP_UPD;
> - NumVecs = 2; break;
> - case AArch64ISD::NEON_LD3DUP: NewOpc = AArch64ISD::NEON_LD3DUP_UPD;
> - NumVecs = 3; break;
> - case AArch64ISD::NEON_LD4DUP: NewOpc = AArch64ISD::NEON_LD4DUP_UPD;
> - NumVecs = 4; break;
> - }
> - }
> -
> - // Find the size of memory referenced by the load/store.
> - EVT VecTy;
> - if (isLoad)
> - VecTy = N->getValueType(0);
> - else
> - VecTy = N->getOperand(AddrOpIdx + 1).getValueType();
> - unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
> - if (isLaneOp)
> - NumBytes /= VecTy.getVectorNumElements();
> -
> - // If the increment is a constant, it must match the memory ref size.
> - SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
> - if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
> - uint32_t IncVal = CInc->getZExtValue();
> - if (IncVal != NumBytes)
> - continue;
> - Inc = DAG.getTargetConstant(IncVal, MVT::i32);
> - }
> -
> - // Create the new updating load/store node.
> - EVT Tys[6];
> - unsigned NumResultVecs = (isLoad ? NumVecs : 0);
> - unsigned n;
> - for (n = 0; n < NumResultVecs; ++n)
> - Tys[n] = VecTy;
> - Tys[n++] = MVT::i64;
> - Tys[n] = MVT::Other;
> - SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs + 2));
> - SmallVector<SDValue, 8> Ops;
> - Ops.push_back(N->getOperand(0)); // incoming chain
> - Ops.push_back(N->getOperand(AddrOpIdx));
> - Ops.push_back(Inc);
> - for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
> - Ops.push_back(N->getOperand(i));
> - }
> - MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
> - SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
> - Ops, MemInt->getMemoryVT(),
> - MemInt->getMemOperand());
> -
> - // Update the uses.
> - std::vector<SDValue> NewResults;
> - for (unsigned i = 0; i < NumResultVecs; ++i) {
> - NewResults.push_back(SDValue(UpdN.getNode(), i));
> - }
> - NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
> - DCI.CombineTo(N, NewResults);
> - DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
> -
> - break;
> - }
> - return SDValue();
> -}
> -
> -/// For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1)
> -/// intrinsic, and if all the other uses of that intrinsic are also VDUPLANEs.
> -/// If so, combine them to a vldN-dup operation and return true.
> -static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
> - SelectionDAG &DAG = DCI.DAG;
> - EVT VT = N->getValueType(0);
> -
> - // Check if the VDUPLANE operand is a vldN-dup intrinsic.
> - SDNode *VLD = N->getOperand(0).getNode();
> - if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
> - return SDValue();
> - unsigned NumVecs = 0;
> - unsigned NewOpc = 0;
> - unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
> - if (IntNo == Intrinsic::arm_neon_vld2lane) {
> - NumVecs = 2;
> - NewOpc = AArch64ISD::NEON_LD2DUP;
> - } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
> - NumVecs = 3;
> - NewOpc = AArch64ISD::NEON_LD3DUP;
> - } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
> - NumVecs = 4;
> - NewOpc = AArch64ISD::NEON_LD4DUP;
> - } else {
> - return SDValue();
> - }
> -
> - // First check that all the vldN-lane uses are VDUPLANEs and that the lane
> - // numbers match the load.
> - unsigned VLDLaneNo =
> - cast<ConstantSDNode>(VLD->getOperand(NumVecs + 3))->getZExtValue();
> - for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
> - UI != UE; ++UI) {
> - // Ignore uses of the chain result.
> - if (UI.getUse().getResNo() == NumVecs)
> - continue;
> - SDNode *User = *UI;
> - if (User->getOpcode() != AArch64ISD::NEON_VDUPLANE ||
> - VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
> - return SDValue();
> - }
> -
> - // Create the vldN-dup node.
> - EVT Tys[5];
> - unsigned n;
> - for (n = 0; n < NumVecs; ++n)
> - Tys[n] = VT;
> - Tys[n] = MVT::Other;
> - SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs + 1));
> - SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
> - MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
> - SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops,
> - VLDMemInt->getMemoryVT(),
> - VLDMemInt->getMemOperand());
> -
> - // Update the uses.
> - for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
> - UI != UE; ++UI) {
> - unsigned ResNo = UI.getUse().getResNo();
> - // Ignore uses of the chain result.
> - if (ResNo == NumVecs)
> - continue;
> - SDNode *User = *UI;
> - DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
> - }
> -
> - // Now the vldN-lane intrinsic is dead except for its chain result.
> - // Update uses of the chain.
> - std::vector<SDValue> VLDDupResults;
> - for (unsigned n = 0; n < NumVecs; ++n)
> - VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
> - VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
> - DCI.CombineTo(VLD, VLDDupResults);
> -
> - return SDValue(N, 0);
> -}
> -
> -// vselect (v1i1 setcc) ->
> -// vselect (v1iXX setcc) (XX is the size of the compared operand type)
> -// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
> -// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
> -// such VSELECT.
> -static SDValue PerformVSelectCombine(SDNode *N, SelectionDAG &DAG) {
> - SDValue N0 = N->getOperand(0);
> - EVT CCVT = N0.getValueType();
> -
> - if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 ||
> - CCVT.getVectorElementType() != MVT::i1)
> - return SDValue();
> -
> - EVT ResVT = N->getValueType(0);
> - EVT CmpVT = N0.getOperand(0).getValueType();
> - // Only combine when the result type is of the same size as the compared
> - // operands.
> - if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
> - return SDValue();
> -
> - SDValue IfTrue = N->getOperand(1);
> - SDValue IfFalse = N->getOperand(2);
> - SDValue SetCC =
> - DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
> - N0.getOperand(0), N0.getOperand(1),
> - cast<CondCodeSDNode>(N0.getOperand(2))->get());
> - return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
> - IfTrue, IfFalse);
> -}
> -
> -// sign_extend (extract_vector_elt (v1i1 setcc)) ->
> -// extract_vector_elt (v1iXX setcc)
> -// (XX is the size of the compared operand type)
> -static SDValue PerformSignExtendCombine(SDNode *N, SelectionDAG &DAG) {
> - SDValue N0 = N->getOperand(0);
> - SDValue Vec = N0.getOperand(0);
> -
> - if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
> - Vec.getOpcode() != ISD::SETCC)
> - return SDValue();
> -
> - EVT ResVT = N->getValueType(0);
> - EVT CmpVT = Vec.getOperand(0).getValueType();
> - // Only optimize when the result type is of the same size as the element
> - // type of the compared operand.
> - if (ResVT.getSizeInBits() != CmpVT.getVectorElementType().getSizeInBits())
> - return SDValue();
> -
> - SDValue Lane = N0.getOperand(1);
> - SDValue SetCC =
> - DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
> - Vec.getOperand(0), Vec.getOperand(1),
> - cast<CondCodeSDNode>(Vec.getOperand(2))->get());
> - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ResVT,
> - SetCC, Lane);
> -}
> -
> -SDValue
> -AArch64TargetLowering::PerformDAGCombine(SDNode *N,
> - DAGCombinerInfo &DCI) const {
> - switch (N->getOpcode()) {
> - default: break;
> - case ISD::AND: return PerformANDCombine(N, DCI);
> - case ISD::OR: return PerformORCombine(N, DCI, getSubtarget());
> - case ISD::SHL:
> - case ISD::SRA:
> - case ISD::SRL:
> - return PerformShiftCombine(N, DCI, getSubtarget());
> - case ISD::VSELECT: return PerformVSelectCombine(N, DCI.DAG);
> - case ISD::SIGN_EXTEND: return PerformSignExtendCombine(N, DCI.DAG);
> - case ISD::INTRINSIC_WO_CHAIN:
> - return PerformIntrinsicCombine(N, DCI.DAG);
> - case AArch64ISD::NEON_VDUPLANE:
> - return CombineVLDDUP(N, DCI);
> - case AArch64ISD::NEON_LD2DUP:
> - case AArch64ISD::NEON_LD3DUP:
> - case AArch64ISD::NEON_LD4DUP:
> - return CombineBaseUpdate(N, DCI);
> - case ISD::INTRINSIC_VOID:
> - case ISD::INTRINSIC_W_CHAIN:
> - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
> - case Intrinsic::arm_neon_vld1:
> - case Intrinsic::arm_neon_vld2:
> - case Intrinsic::arm_neon_vld3:
> - case Intrinsic::arm_neon_vld4:
> - case Intrinsic::arm_neon_vst1:
> - case Intrinsic::arm_neon_vst2:
> - case Intrinsic::arm_neon_vst3:
> - case Intrinsic::arm_neon_vst4:
> - case Intrinsic::arm_neon_vld2lane:
> - case Intrinsic::arm_neon_vld3lane:
> - case Intrinsic::arm_neon_vld4lane:
> - case Intrinsic::aarch64_neon_vld1x2:
> - case Intrinsic::aarch64_neon_vld1x3:
> - case Intrinsic::aarch64_neon_vld1x4:
> - case Intrinsic::aarch64_neon_vst1x2:
> - case Intrinsic::aarch64_neon_vst1x3:
> - case Intrinsic::aarch64_neon_vst1x4:
> - case Intrinsic::arm_neon_vst2lane:
> - case Intrinsic::arm_neon_vst3lane:
> - case Intrinsic::arm_neon_vst4lane:
> - return CombineBaseUpdate(N, DCI);
> - default:
> - break;
> - }
> - }
> - return SDValue();
> -}
> -
> -bool
> -AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
> - VT = VT.getScalarType();
> -
> - if (!VT.isSimple())
> - return false;
> -
> - switch (VT.getSimpleVT().SimpleTy) {
> - case MVT::f16:
> - case MVT::f32:
> - case MVT::f64:
> - return true;
> - case MVT::f128:
> - return false;
> - default:
> - break;
> - }
> -
> - return false;
> -}
> -
> -bool AArch64TargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
> - unsigned AddrSpace,
> - bool *Fast) const {
> - const AArch64Subtarget *Subtarget = getSubtarget();
> - // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
> - bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
> -
> - switch (VT.getSimpleVT().SimpleTy) {
> - default:
> - return false;
> - // Scalar types
> - case MVT::i8: case MVT::i16:
> - case MVT::i32: case MVT::i64:
> - case MVT::f32: case MVT::f64: {
> - // Unaligned access can use (for example) LRDB, LRDH, LDRW
> - if (AllowsUnaligned) {
> - if (Fast)
> - *Fast = true;
> - return true;
> - }
> - return false;
> - }
> - // 64-bit vector types
> - case MVT::v8i8: case MVT::v4i16:
> - case MVT::v2i32: case MVT::v1i64:
> - case MVT::v2f32: case MVT::v1f64:
> - // 128-bit vector types
> - case MVT::v16i8: case MVT::v8i16:
> - case MVT::v4i32: case MVT::v2i64:
> - case MVT::v4f32: case MVT::v2f64: {
> - // For any little-endian targets with neon, we can support unaligned
> - // load/store of V registers using ld1/st1.
> - // A big-endian target may also explicitly support unaligned accesses
> - if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
> - if (Fast)
> - *Fast = true;
> - return true;
> - }
> - return false;
> - }
> - }
> -}
> -
> -// Check whether a shuffle_vector could be presented as concat_vector.
> -bool AArch64TargetLowering::isConcatVector(SDValue Op, SelectionDAG &DAG,
> - SDValue V0, SDValue V1,
> - const int *Mask,
> - SDValue &Res) const {
> - SDLoc DL(Op);
> - EVT VT = Op.getValueType();
> - if (VT.getSizeInBits() != 128)
> - return false;
> - if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
> - VT.getVectorElementType() != V1.getValueType().getVectorElementType())
> - return false;
> -
> - unsigned NumElts = VT.getVectorNumElements();
> - bool isContactVector = true;
> - bool splitV0 = false;
> - if (V0.getValueType().getSizeInBits() == 128)
> - splitV0 = true;
> -
> - for (int I = 0, E = NumElts / 2; I != E; I++) {
> - if (Mask[I] != I) {
> - isContactVector = false;
> - break;
> - }
> - }
> -
> - if (isContactVector) {
> - int offset = NumElts / 2;
> - for (int I = NumElts / 2, E = NumElts; I != E; I++) {
> - if (Mask[I] != I + splitV0 * offset) {
> - isContactVector = false;
> - break;
> - }
> - }
> - }
> -
> - if (isContactVector) {
> - EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
> - NumElts / 2);
> - if (splitV0) {
> - V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
> - DAG.getConstant(0, MVT::i64));
> - }
> - if (V1.getValueType().getSizeInBits() == 128) {
> - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
> - DAG.getConstant(0, MVT::i64));
> - }
> - Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
> - return true;
> - }
> - return false;
> -}
> -
> -// Check whether a Build Vector could be presented as Shuffle Vector.
> -// This Shuffle Vector maybe not legalized, so the length of its operand and
> -// the length of result may not equal.
> -bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
> - SDValue &V0, SDValue &V1,
> - int *Mask) const {
> - SDLoc DL(Op);
> - EVT VT = Op.getValueType();
> - unsigned NumElts = VT.getVectorNumElements();
> - unsigned V0NumElts = 0;
> -
> - // Check if all elements are extracted from less than 3 vectors.
> - for (unsigned i = 0; i < NumElts; ++i) {
> - SDValue Elt = Op.getOperand(i);
> - if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
> - Elt.getOperand(0).getValueType().getVectorElementType() !=
> - VT.getVectorElementType())
> - return false;
> -
> - if (!V0.getNode()) {
> - V0 = Elt.getOperand(0);
> - V0NumElts = V0.getValueType().getVectorNumElements();
> - }
> - if (Elt.getOperand(0) == V0) {
> - Mask[i] = (cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue());
> - continue;
> - } else if (!V1.getNode()) {
> - V1 = Elt.getOperand(0);
> - }
> - if (Elt.getOperand(0) == V1) {
> - unsigned Lane = cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue();
> - Mask[i] = (Lane + V0NumElts);
> - continue;
> - } else {
> - return false;
> - }
> - }
> - return true;
> -}
> -
> -// LowerShiftRightParts - Lower SRL_PARTS and SRA_PARTS, which returns two
> -/// i64 values and take a 2 x i64 value to shift plus a shift amount.
> -SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
> - SelectionDAG &DAG) const {
> - assert(Op.getNumOperands() == 3 && "Not a quad-shift!");
> - EVT VT = Op.getValueType();
> - unsigned VTBits = VT.getSizeInBits();
> - SDLoc dl(Op);
> - SDValue ShOpLo = Op.getOperand(0);
> - SDValue ShOpHi = Op.getOperand(1);
> - SDValue ShAmt = Op.getOperand(2);
> - unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
> -
> - assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
> - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
> - DAG.getConstant(VTBits, MVT::i64), ShAmt);
> - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
> - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
> - DAG.getConstant(VTBits, MVT::i64));
> - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
> - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
> - SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
> - SDValue Tmp3 = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
> -
> - SDValue A64cc;
> - SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt,
> - DAG.getConstant(0, MVT::i64),
> - ISD::SETGE, A64cc,
> - DAG, dl);
> -
> - SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
> - DAG.getConstant(0, Tmp3.getValueType()), Tmp3,
> - A64cc);
> - SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
> - TrueVal, FalseVal, A64cc);
> -
> - SDValue Ops[2] = { Lo, Hi };
> - return DAG.getMergeValues(Ops, dl);
> -}
> -
> -/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
> -/// i64 values and take a 2 x i64 value to shift plus a shift amount.
> -SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
> - SelectionDAG &DAG) const {
> - assert(Op.getNumOperands() == 3 && "Not a quad-shift!");
> - EVT VT = Op.getValueType();
> - unsigned VTBits = VT.getSizeInBits();
> - SDLoc dl(Op);
> - SDValue ShOpLo = Op.getOperand(0);
> - SDValue ShOpHi = Op.getOperand(1);
> - SDValue ShAmt = Op.getOperand(2);
> -
> - assert(Op.getOpcode() == ISD::SHL_PARTS);
> - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
> - DAG.getConstant(VTBits, MVT::i64), ShAmt);
> - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
> - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
> - DAG.getConstant(VTBits, MVT::i64));
> - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
> - SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
> - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
> - SDValue Tmp4 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
> -
> - SDValue A64cc;
> - SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt,
> - DAG.getConstant(0, MVT::i64),
> - ISD::SETGE, A64cc,
> - DAG, dl);
> -
> - SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
> - DAG.getConstant(0, Tmp4.getValueType()), Tmp4,
> - A64cc);
> - SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
> - Tmp3, FalseVal, A64cc);
> -
> - SDValue Ops[2] = { Lo, Hi };
> - return DAG.getMergeValues(Ops, dl);
> -}
> -
> -// If this is a case we can't handle, return null and let the default
> -// expansion code take care of it.
> -SDValue
> -AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
> - const AArch64Subtarget *ST) const {
> -
> - BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
> - SDLoc DL(Op);
> - EVT VT = Op.getValueType();
> -
> - APInt SplatBits, SplatUndef;
> - unsigned SplatBitSize;
> - bool HasAnyUndefs;
> -
> - unsigned UseNeonMov = VT.getSizeInBits() >= 64;
> -
> - // Note we favor lowering MOVI over MVNI.
> - // This has implications on the definition of patterns in TableGen to select
> - // BIC immediate instructions but not ORR immediate instructions.
> - // If this lowering order is changed, TableGen patterns for BIC immediate and
> - // ORR immediate instructions have to be updated.
> - if (UseNeonMov &&
> - BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
> - if (SplatBitSize <= 64) {
> - // First attempt to use vector immediate-form MOVI
> - EVT NeonMovVT;
> - unsigned Imm = 0;
> - unsigned OpCmode = 0;
> -
> - if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
> - SplatBitSize, DAG, VT.is128BitVector(),
> - Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) {
> - SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
> - SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
> -
> - if (ImmVal.getNode() && OpCmodeVal.getNode()) {
> - SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT,
> - ImmVal, OpCmodeVal);
> - return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
> - }
> - }
> -
> - // Then attempt to use vector immediate-form MVNI
> - uint64_t NegatedImm = (~SplatBits).getZExtValue();
> - if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
> - DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT,
> - Imm, OpCmode)) {
> - SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
> - SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
> - if (ImmVal.getNode() && OpCmodeVal.getNode()) {
> - SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT,
> - ImmVal, OpCmodeVal);
> - return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
> - }
> - }
> -
> - // Attempt to use vector immediate-form FMOV
> - if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) ||
> - (VT == MVT::v2f64 && SplatBitSize == 64)) {
> - APFloat RealVal(
> - SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble,
> - SplatBits);
> - uint32_t ImmVal;
> - if (A64Imms::isFPImm(RealVal, ImmVal)) {
> - SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
> - return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val);
> - }
> - }
> - }
> - }
> -
> - unsigned NumElts = VT.getVectorNumElements();
> - bool isOnlyLowElement = true;
> - bool usesOnlyOneValue = true;
> - bool hasDominantValue = false;
> - bool isConstant = true;
> -
> - // Map of the number of times a particular SDValue appears in the
> - // element list.
> - DenseMap<SDValue, unsigned> ValueCounts;
> - SDValue Value;
> - for (unsigned i = 0; i < NumElts; ++i) {
> - SDValue V = Op.getOperand(i);
> - if (V.getOpcode() == ISD::UNDEF)
> - continue;
> - if (i > 0)
> - isOnlyLowElement = false;
> - if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
> - isConstant = false;
> -
> - ValueCounts.insert(std::make_pair(V, 0));
> - unsigned &Count = ValueCounts[V];
> -
> - // Is this value dominant? (takes up more than half of the lanes)
> - if (++Count > (NumElts / 2)) {
> - hasDominantValue = true;
> - Value = V;
> - }
> - }
> - if (ValueCounts.size() != 1)
> - usesOnlyOneValue = false;
> - if (!Value.getNode() && ValueCounts.size() > 0)
> - Value = ValueCounts.begin()->first;
> -
> - if (ValueCounts.size() == 0)
> - return DAG.getUNDEF(VT);
> -
> - if (isOnlyLowElement)
> - return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
> -
> - unsigned EltSize = VT.getVectorElementType().getSizeInBits();
> - if (hasDominantValue && EltSize <= 64) {
> - // Use VDUP for non-constant splats.
> - if (!isConstant) {
> - SDValue N;
> -
> - // If we are DUPing a value that comes directly from a vector, we could
> - // just use DUPLANE. We can only do this if the lane being extracted
> - // is at a constant index, as the DUP from lane instructions only have
> - // constant-index forms.
> - //
> - // If there is a TRUNCATE between EXTRACT_VECTOR_ELT and DUP, we can
> - // remove TRUNCATE for DUPLANE by apdating the source vector to
> - // appropriate vector type and lane index.
> - //
> - // FIXME: for now we have v1i8, v1i16, v1i32 legal vector types, if they
> - // are not legal any more, no need to check the type size in bits should
> - // be large than 64.
> - SDValue V = Value;
> - if (Value->getOpcode() == ISD::TRUNCATE)
> - V = Value->getOperand(0);
> - if (V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
> - isa<ConstantSDNode>(V->getOperand(1)) &&
> - V->getOperand(0).getValueType().getSizeInBits() >= 64) {
> -
> - // If the element size of source vector is larger than DUPLANE
> - // element size, we can do transformation by,
> - // 1) bitcasting source register to smaller element vector
> - // 2) mutiplying the lane index by SrcEltSize/ResEltSize
> - // For example, we can lower
> - // "v8i16 vdup_lane(v4i32, 1)"
> - // to be
> - // "v8i16 vdup_lane(v8i16 bitcast(v4i32), 2)".
> - SDValue SrcVec = V->getOperand(0);
> - unsigned SrcEltSize =
> - SrcVec.getValueType().getVectorElementType().getSizeInBits();
> - unsigned ResEltSize = VT.getVectorElementType().getSizeInBits();
> - if (SrcEltSize > ResEltSize) {
> - assert((SrcEltSize % ResEltSize == 0) && "Invalid element size");
> - SDValue BitCast;
> - unsigned SrcSize = SrcVec.getValueType().getSizeInBits();
> - unsigned ResSize = VT.getSizeInBits();
> -
> - if (SrcSize > ResSize) {
> - assert((SrcSize % ResSize == 0) && "Invalid vector size");
> - EVT CastVT =
> - EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
> - SrcSize / ResEltSize);
> - BitCast = DAG.getNode(ISD::BITCAST, DL, CastVT, SrcVec);
> - } else {
> - assert((SrcSize == ResSize) && "Invalid vector size of source vec");
> - BitCast = DAG.getNode(ISD::BITCAST, DL, VT, SrcVec);
> - }
> -
> - unsigned LaneIdx = V->getConstantOperandVal(1);
> - SDValue Lane =
> - DAG.getConstant((SrcEltSize / ResEltSize) * LaneIdx, MVT::i64);
> - N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, BitCast, Lane);
> - } else {
> - assert((SrcEltSize == ResEltSize) &&
> - "Invalid element size of source vec");
> - N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, V->getOperand(0),
> - V->getOperand(1));
> - }
> - } else
> - N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
> -
> - if (!usesOnlyOneValue) {
> - // The dominant value was splatted as 'N', but we now have to insert
> - // all differing elements.
> - for (unsigned I = 0; I < NumElts; ++I) {
> - if (Op.getOperand(I) == Value)
> - continue;
> - SmallVector<SDValue, 3> Ops;
> - Ops.push_back(N);
> - Ops.push_back(Op.getOperand(I));
> - Ops.push_back(DAG.getConstant(I, MVT::i64));
> - N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Ops);
> - }
> - }
> - return N;
> - }
> - if (usesOnlyOneValue && isConstant) {
> - return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
> - }
> - }
> - // If all elements are constants and the case above didn't get hit, fall back
> - // to the default expansion, which will generate a load from the constant
> - // pool.
> - if (isConstant)
> - return SDValue();
> -
> - // Try to lower this in lowering ShuffleVector way.
> - SDValue V0, V1;
> - int Mask[16];
> - if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) {
> - unsigned V0NumElts = V0.getValueType().getVectorNumElements();
> - if (!V1.getNode() && V0NumElts == NumElts * 2) {
> - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
> - DAG.getConstant(NumElts, MVT::i64));
> - V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
> - DAG.getConstant(0, MVT::i64));
> - V0NumElts = V0.getValueType().getVectorNumElements();
> - }
> -
> - if (V1.getNode() && NumElts == V0NumElts &&
> - V0NumElts == V1.getValueType().getVectorNumElements()) {
> - SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
> - if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
> - return Shuffle;
> - else
> - return LowerVECTOR_SHUFFLE(Shuffle, DAG);
> - } else {
> - SDValue Res;
> - if (isConcatVector(Op, DAG, V0, V1, Mask, Res))
> - return Res;
> - }
> - }
> -
> - // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
> - // know the default expansion would otherwise fall back on something even
> - // worse. For a vector with one or two non-undef values, that's
> - // scalar_to_vector for the elements followed by a shuffle (provided the
> - // shuffle is valid for the target) and materialization element by element
> - // on the stack followed by a load for everything else.
> - if (!isConstant && !usesOnlyOneValue) {
> - SDValue Vec = DAG.getUNDEF(VT);
> - for (unsigned i = 0 ; i < NumElts; ++i) {
> - SDValue V = Op.getOperand(i);
> - if (V.getOpcode() == ISD::UNDEF)
> - continue;
> - SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
> - Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx);
> - }
> - return Vec;
> - }
> - return SDValue();
> -}
> -
> -/// isREVMask - Check if a vector shuffle corresponds to a REV
> -/// instruction with the specified blocksize. (The order of the elements
> -/// within each block of the vector is reversed.)
> -static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
> - assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
> - "Only possible block sizes for REV are: 16, 32, 64");
> -
> - unsigned EltSz = VT.getVectorElementType().getSizeInBits();
> - if (EltSz == 64)
> - return false;
> -
> - unsigned NumElts = VT.getVectorNumElements();
> - unsigned BlockElts = M[0] + 1;
> - // If the first shuffle index is UNDEF, be optimistic.
> - if (M[0] < 0)
> - BlockElts = BlockSize / EltSz;
> -
> - if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
> - return false;
> -
> - for (unsigned i = 0; i < NumElts; ++i) {
> - if (M[i] < 0)
> - continue; // ignore UNDEF indices
> - if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
> - return false;
> - }
> -
> - return true;
> -}
> -
> -// isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and
> -// TRN instruction.
> -static unsigned isPermuteMask(ArrayRef<int> M, EVT VT, bool isV2undef) {
> - unsigned NumElts = VT.getVectorNumElements();
> - if (NumElts < 4)
> - return 0;
> -
> - bool ismatch = true;
> -
> - // Check UZP1
> - for (unsigned i = 0; i < NumElts; ++i) {
> - unsigned answer = i * 2;
> - if (isV2undef && answer >= NumElts)
> - answer -= NumElts;
> - if (M[i] != -1 && (unsigned)M[i] != answer) {
> - ismatch = false;
> - break;
> - }
> - }
> - if (ismatch)
> - return AArch64ISD::NEON_UZP1;
> -
> - // Check UZP2
> - ismatch = true;
> - for (unsigned i = 0; i < NumElts; ++i) {
> - unsigned answer = i * 2 + 1;
> - if (isV2undef && answer >= NumElts)
> - answer -= NumElts;
> - if (M[i] != -1 && (unsigned)M[i] != answer) {
> - ismatch = false;
> - break;
> - }
> - }
> - if (ismatch)
> - return AArch64ISD::NEON_UZP2;
> -
> - // Check ZIP1
> - ismatch = true;
> - for (unsigned i = 0; i < NumElts; ++i) {
> - unsigned answer = i / 2 + NumElts * (i % 2);
> - if (isV2undef && answer >= NumElts)
> - answer -= NumElts;
> - if (M[i] != -1 && (unsigned)M[i] != answer) {
> - ismatch = false;
> - break;
> - }
> - }
> - if (ismatch)
> - return AArch64ISD::NEON_ZIP1;
> -
> - // Check ZIP2
> - ismatch = true;
> - for (unsigned i = 0; i < NumElts; ++i) {
> - unsigned answer = (NumElts + i) / 2 + NumElts * (i % 2);
> - if (isV2undef && answer >= NumElts)
> - answer -= NumElts;
> - if (M[i] != -1 && (unsigned)M[i] != answer) {
> - ismatch = false;
> - break;
> - }
> - }
> - if (ismatch)
> - return AArch64ISD::NEON_ZIP2;
> -
> - // Check TRN1
> - ismatch = true;
> - for (unsigned i = 0; i < NumElts; ++i) {
> - unsigned answer = i + (NumElts - 1) * (i % 2);
> - if (isV2undef && answer >= NumElts)
> - answer -= NumElts;
> - if (M[i] != -1 && (unsigned)M[i] != answer) {
> - ismatch = false;
> - break;
> - }
> - }
> - if (ismatch)
> - return AArch64ISD::NEON_TRN1;
> -
> - // Check TRN2
> - ismatch = true;
> - for (unsigned i = 0; i < NumElts; ++i) {
> - unsigned answer = 1 + i + (NumElts - 1) * (i % 2);
> - if (isV2undef && answer >= NumElts)
> - answer -= NumElts;
> - if (M[i] != -1 && (unsigned)M[i] != answer) {
> - ismatch = false;
> - break;
> - }
> - }
> - if (ismatch)
> - return AArch64ISD::NEON_TRN2;
> -
> - return 0;
> -}
> -
> -SDValue
> -AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
> - SelectionDAG &DAG) const {
> - SDValue V1 = Op.getOperand(0);
> - SDValue V2 = Op.getOperand(1);
> - SDLoc dl(Op);
> - EVT VT = Op.getValueType();
> - ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
> -
> - // Convert shuffles that are directly supported on NEON to target-specific
> - // DAG nodes, instead of keeping them as shuffles and matching them again
> - // during code selection. This is more efficient and avoids the possibility
> - // of inconsistencies between legalization and selection.
> - ArrayRef<int> ShuffleMask = SVN->getMask();
> -
> - unsigned EltSize = VT.getVectorElementType().getSizeInBits();
> - if (EltSize > 64)
> - return SDValue();
> -
> - if (isREVMask(ShuffleMask, VT, 64))
> - return DAG.getNode(AArch64ISD::NEON_REV64, dl, VT, V1);
> - if (isREVMask(ShuffleMask, VT, 32))
> - return DAG.getNode(AArch64ISD::NEON_REV32, dl, VT, V1);
> - if (isREVMask(ShuffleMask, VT, 16))
> - return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1);
> -
> - unsigned ISDNo;
> - if (V2.getOpcode() == ISD::UNDEF)
> - ISDNo = isPermuteMask(ShuffleMask, VT, true);
> - else
> - ISDNo = isPermuteMask(ShuffleMask, VT, false);
> -
> - if (ISDNo) {
> - if (V2.getOpcode() == ISD::UNDEF)
> - return DAG.getNode(ISDNo, dl, VT, V1, V1);
> - else
> - return DAG.getNode(ISDNo, dl, VT, V1, V2);
> - }
> -
> - SDValue Res;
> - if (isConcatVector(Op, DAG, V1, V2, &ShuffleMask[0], Res))
> - return Res;
> -
> - // If the element of shuffle mask are all the same constant, we can
> - // transform it into either NEON_VDUP or NEON_VDUPLANE
> - if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
> - int Lane = SVN->getSplatIndex();
> - // If this is undef splat, generate it via "just" vdup, if possible.
> - if (Lane == -1) Lane = 0;
> -
> - // Test if V1 is a SCALAR_TO_VECTOR.
> - if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
> - return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0));
> - }
> - // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR.
> - if (V1.getOpcode() == ISD::BUILD_VECTOR) {
> - bool IsScalarToVector = true;
> - for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i)
> - if (V1.getOperand(i).getOpcode() != ISD::UNDEF &&
> - i != (unsigned)Lane) {
> - IsScalarToVector = false;
> - break;
> - }
> - if (IsScalarToVector)
> - return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
> - V1.getOperand(Lane));
> - }
> -
> - // Test if V1 is a EXTRACT_SUBVECTOR.
> - if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
> - int ExtLane = cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
> - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1.getOperand(0),
> - DAG.getConstant(Lane + ExtLane, MVT::i64));
> - }
> - // Test if V1 is a CONCAT_VECTORS.
> - if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
> - V1.getOperand(1).getOpcode() == ISD::UNDEF) {
> - SDValue Op0 = V1.getOperand(0);
> - assert((unsigned)Lane < Op0.getValueType().getVectorNumElements() &&
> - "Invalid vector lane access");
> - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, Op0,
> - DAG.getConstant(Lane, MVT::i64));
> - }
> -
> - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
> - DAG.getConstant(Lane, MVT::i64));
> - }
> -
> - int Length = ShuffleMask.size();
> - int V1EltNum = V1.getValueType().getVectorNumElements();
> -
> - // If the number of v1 elements is the same as the number of shuffle mask
> - // element and the shuffle masks are sequential values, we can transform
> - // it into NEON_VEXTRACT.
> - if (V1EltNum == Length) {
> - // Check if the shuffle mask is sequential.
> - int SkipUndef = 0;
> - while (ShuffleMask[SkipUndef] == -1) {
> - SkipUndef++;
> - }
> - int CurMask = ShuffleMask[SkipUndef];
> - if (CurMask >= SkipUndef) {
> - bool IsSequential = true;
> - for (int I = SkipUndef; I < Length; ++I) {
> - if (ShuffleMask[I] != -1 && ShuffleMask[I] != CurMask) {
> - IsSequential = false;
> - break;
> - }
> - CurMask++;
> - }
> - if (IsSequential) {
> - assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect");
> - unsigned VecSize = EltSize * V1EltNum;
> - unsigned Index = (EltSize / 8) * (ShuffleMask[SkipUndef] - SkipUndef);
> - if (VecSize == 64 || VecSize == 128)
> - return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2,
> - DAG.getConstant(Index, MVT::i64));
> - }
> - }
> - }
> -
> - // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert
> - // by element from V2 to V1 .
> - // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a
> - // better choice to be inserted than V1 as less insert needed, so we count
> - // element to be inserted for both V1 and V2, and select less one as insert
> - // target.
> -
> - // Collect elements need to be inserted and their index.
> - SmallVector<int, 8> NV1Elt;
> - SmallVector<int, 8> N1Index;
> - SmallVector<int, 8> NV2Elt;
> - SmallVector<int, 8> N2Index;
> - for (int I = 0; I != Length; ++I) {
> - if (ShuffleMask[I] != I) {
> - NV1Elt.push_back(ShuffleMask[I]);
> - N1Index.push_back(I);
> - }
> - }
> - for (int I = 0; I != Length; ++I) {
> - if (ShuffleMask[I] != (I + V1EltNum)) {
> - NV2Elt.push_back(ShuffleMask[I]);
> - N2Index.push_back(I);
> - }
> - }
> -
> - // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2
> - // will be inserted.
> - SDValue InsV = V1;
> - SmallVector<int, 8> InsMasks = NV1Elt;
> - SmallVector<int, 8> InsIndex = N1Index;
> - if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) {
> - if (NV1Elt.size() > NV2Elt.size()) {
> - InsV = V2;
> - InsMasks = NV2Elt;
> - InsIndex = N2Index;
> - }
> - } else {
> - InsV = DAG.getNode(ISD::UNDEF, dl, VT);
> - }
> -
> - for (int I = 0, E = InsMasks.size(); I != E; ++I) {
> - SDValue ExtV = V1;
> - int Mask = InsMasks[I];
> - if (Mask >= V1EltNum) {
> - ExtV = V2;
> - Mask -= V1EltNum;
> - }
> - // Any value type smaller than i32 is illegal in AArch64, and this lower
> - // function is called after legalize pass, so we need to legalize
> - // the result here.
> - EVT EltVT;
> - if (VT.getVectorElementType().isFloatingPoint())
> - EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32;
> - else
> - EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32;
> -
> - if (Mask >= 0) {
> - ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
> - DAG.getConstant(Mask, MVT::i64));
> - InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV,
> - DAG.getConstant(InsIndex[I], MVT::i64));
> - }
> - }
> - return InsV;
> -}
> -
> -AArch64TargetLowering::ConstraintType
> -AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
> - if (Constraint.size() == 1) {
> - switch (Constraint[0]) {
> - default: break;
> - case 'w': // An FP/SIMD vector register
> - return C_RegisterClass;
> - case 'I': // Constant that can be used with an ADD instruction
> - case 'J': // Constant that can be used with a SUB instruction
> - case 'K': // Constant that can be used with a 32-bit logical instruction
> - case 'L': // Constant that can be used with a 64-bit logical instruction
> - case 'M': // Constant that can be used as a 32-bit MOV immediate
> - case 'N': // Constant that can be used as a 64-bit MOV immediate
> - case 'Y': // Floating point constant zero
> - case 'Z': // Integer constant zero
> - return C_Other;
> - case 'Q': // A memory reference with base register and no offset
> - return C_Memory;
> - case 'S': // A symbolic address
> - return C_Other;
> - }
> - }
> -
> - // FIXME: Ump, Utf, Usa, Ush
> - // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes,
> - // whatever they may be
> - // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
> - // Usa: An absolute symbolic address
> - // Ush: The high part (bits 32:12) of a pc-relative symbolic address
> - assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa"
> - && Constraint != "Ush" && "Unimplemented constraints");
> -
> - return TargetLowering::getConstraintType(Constraint);
> -}
> -
> -TargetLowering::ConstraintWeight
> -AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info,
> - const char *Constraint) const {
> -
> - llvm_unreachable("Constraint weight unimplemented");
> -}
> -
> -void
> -AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
> - std::string &Constraint,
> - std::vector<SDValue> &Ops,
> - SelectionDAG &DAG) const {
> - SDValue Result;
> -
> - // Only length 1 constraints are C_Other.
> - if (Constraint.size() != 1) return;
> -
> - // Only C_Other constraints get lowered like this. That means constants for us
> - // so return early if there's no hope the constraint can be lowered.
> -
> - switch(Constraint[0]) {
> - default: break;
> - case 'I': case 'J': case 'K': case 'L':
> - case 'M': case 'N': case 'Z': {
> - ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
> - if (!C)
> - return;
> -
> - uint64_t CVal = C->getZExtValue();
> - uint32_t Bits;
> -
> - switch (Constraint[0]) {
> - default:
> - // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J'
> - // is a peculiarly useless SUB constraint.
> - llvm_unreachable("Unimplemented C_Other constraint");
> - case 'I':
> - if (CVal <= 0xfff)
> - break;
> - return;
> - case 'K':
> - if (A64Imms::isLogicalImm(32, CVal, Bits))
> - break;
> - return;
> - case 'L':
> - if (A64Imms::isLogicalImm(64, CVal, Bits))
> - break;
> - return;
> - case 'Z':
> - if (CVal == 0)
> - break;
> - return;
> - }
> -
> - Result = DAG.getTargetConstant(CVal, Op.getValueType());
> - break;
> - }
> - case 'S': {
> - // An absolute symbolic address or label reference.
> - if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
> - Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
> - GA->getValueType(0));
> - } else if (const BlockAddressSDNode *BA
> - = dyn_cast<BlockAddressSDNode>(Op)) {
> - Result = DAG.getTargetBlockAddress(BA->getBlockAddress(),
> - BA->getValueType(0));
> - } else if (const ExternalSymbolSDNode *ES
> - = dyn_cast<ExternalSymbolSDNode>(Op)) {
> - Result = DAG.getTargetExternalSymbol(ES->getSymbol(),
> - ES->getValueType(0));
> - } else
> - return;
> - break;
> - }
> - case 'Y':
> - if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
> - if (CFP->isExactlyValue(0.0)) {
> - Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0));
> - break;
> - }
> - }
> - return;
> - }
> -
> - if (Result.getNode()) {
> - Ops.push_back(Result);
> - return;
> - }
> -
> - // It's an unknown constraint for us. Let generic code have a go.
> - TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
> -}
> -
> -std::pair<unsigned, const TargetRegisterClass*>
> -AArch64TargetLowering::getRegForInlineAsmConstraint(
> - const std::string &Constraint,
> - MVT VT) const {
> - if (Constraint.size() == 1) {
> - switch (Constraint[0]) {
> - case 'r':
> - if (VT.getSizeInBits() <= 32)
> - return std::make_pair(0U, &AArch64::GPR32RegClass);
> - else if (VT == MVT::i64)
> - return std::make_pair(0U, &AArch64::GPR64RegClass);
> - break;
> - case 'w':
> - if (VT == MVT::f16)
> - return std::make_pair(0U, &AArch64::FPR16RegClass);
> - else if (VT == MVT::f32)
> - return std::make_pair(0U, &AArch64::FPR32RegClass);
> - else if (VT.getSizeInBits() == 64)
> - return std::make_pair(0U, &AArch64::FPR64RegClass);
> - else if (VT.getSizeInBits() == 128)
> - return std::make_pair(0U, &AArch64::FPR128RegClass);
> - break;
> - }
> - }
> -
> - // Use the default implementation in TargetLowering to convert the register
> - // constraint into a member of a register class.
> - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
> -}
> -
> -/// Represent NEON load and store intrinsics as MemIntrinsicNodes.
> -/// The associated MachineMemOperands record the alignment specified
> -/// in the intrinsic calls.
> -bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
> - const CallInst &I,
> - unsigned Intrinsic) const {
> - switch (Intrinsic) {
> - case Intrinsic::arm_neon_vld1:
> - case Intrinsic::arm_neon_vld2:
> - case Intrinsic::arm_neon_vld3:
> - case Intrinsic::arm_neon_vld4:
> - case Intrinsic::aarch64_neon_vld1x2:
> - case Intrinsic::aarch64_neon_vld1x3:
> - case Intrinsic::aarch64_neon_vld1x4:
> - case Intrinsic::arm_neon_vld2lane:
> - case Intrinsic::arm_neon_vld3lane:
> - case Intrinsic::arm_neon_vld4lane: {
> - Info.opc = ISD::INTRINSIC_W_CHAIN;
> - // Conservatively set memVT to the entire set of vectors loaded.
> - uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
> - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
> - Info.ptrVal = I.getArgOperand(0);
> - Info.offset = 0;
> - Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
> - Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
> - Info.vol = false; // volatile loads with NEON intrinsics not supported
> - Info.readMem = true;
> - Info.writeMem = false;
> - return true;
> - }
> - case Intrinsic::arm_neon_vst1:
> - case Intrinsic::arm_neon_vst2:
> - case Intrinsic::arm_neon_vst3:
> - case Intrinsic::arm_neon_vst4:
> - case Intrinsic::aarch64_neon_vst1x2:
> - case Intrinsic::aarch64_neon_vst1x3:
> - case Intrinsic::aarch64_neon_vst1x4:
> - case Intrinsic::arm_neon_vst2lane:
> - case Intrinsic::arm_neon_vst3lane:
> - case Intrinsic::arm_neon_vst4lane: {
> - Info.opc = ISD::INTRINSIC_VOID;
> - // Conservatively set memVT to the entire set of vectors stored.
> - unsigned NumElts = 0;
> - for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
> - Type *ArgTy = I.getArgOperand(ArgI)->getType();
> - if (!ArgTy->isVectorTy())
> - break;
> - NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
> - }
> - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
> - Info.ptrVal = I.getArgOperand(0);
> - Info.offset = 0;
> - Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
> - Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
> - Info.vol = false; // volatile stores with NEON intrinsics not supported
> - Info.readMem = false;
> - Info.writeMem = true;
> - return true;
> - }
> - default:
> - break;
> - }
> -
> - return false;
> -}
> -
> -// Truncations from 64-bit GPR to 32-bit GPR is free.
> -bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
> - if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
> - return false;
> - unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
> - unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
> - if (NumBits1 <= NumBits2)
> - return false;
> - return true;
> -}
> -
> -bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
> - if (!VT1.isInteger() || !VT2.isInteger())
> - return false;
> - unsigned NumBits1 = VT1.getSizeInBits();
> - unsigned NumBits2 = VT2.getSizeInBits();
> - if (NumBits1 <= NumBits2)
> - return false;
> - return true;
> -}
> -
> -// All 32-bit GPR operations implicitly zero the high-half of the corresponding
> -// 64-bit GPR.
> -bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
> - if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
> - return false;
> - unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
> - unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
> - if (NumBits1 == 32 && NumBits2 == 64)
> - return true;
> - return false;
> -}
> -
> -bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
> - if (!VT1.isInteger() || !VT2.isInteger())
> - return false;
> - unsigned NumBits1 = VT1.getSizeInBits();
> - unsigned NumBits2 = VT2.getSizeInBits();
> - if (NumBits1 == 32 && NumBits2 == 64)
> - return true;
> - return false;
> -}
> -
> -bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
> - EVT VT1 = Val.getValueType();
> - if (isZExtFree(VT1, VT2)) {
> - return true;
> - }
> -
> - if (Val.getOpcode() != ISD::LOAD)
> - return false;
> -
> - // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
> - return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() &&
> - VT2.isInteger() && VT1.getSizeInBits() <= 32);
> -}
> -
> -// isLegalAddressingMode - Return true if the addressing mode represented
> -/// by AM is legal for this target, for a load/store of the specified type.
> -bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
> - Type *Ty) const {
> - // AArch64 has five basic addressing modes:
> - // reg
> - // reg + 9-bit signed offset
> - // reg + SIZE_IN_BYTES * 12-bit unsigned offset
> - // reg1 + reg2
> - // reg + SIZE_IN_BYTES * reg
> -
> - // No global is ever allowed as a base.
> - if (AM.BaseGV)
> - return false;
> -
> - // No reg+reg+imm addressing.
> - if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
> - return false;
> -
> - // check reg + imm case:
> - // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
> - uint64_t NumBytes = 0;
> - if (Ty->isSized()) {
> - uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty);
> - NumBytes = NumBits / 8;
> - if (!isPowerOf2_64(NumBits))
> - NumBytes = 0;
> - }
> -
> - if (!AM.Scale) {
> - int64_t Offset = AM.BaseOffs;
> -
> - // 9-bit signed offset
> - if (Offset >= -(1LL << 9) && Offset <= (1LL << 9) - 1)
> - return true;
> -
> - // 12-bit unsigned offset
> - unsigned shift = Log2_64(NumBytes);
> - if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
> - // Must be a multiple of NumBytes (NumBytes is a power of 2)
> - (Offset >> shift) << shift == Offset)
> - return true;
> - return false;
> - }
> - if (!AM.Scale || AM.Scale == 1 ||
> - (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes))
> - return true;
> - return false;
> -}
> -
> -int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
> - Type *Ty) const {
> - // Scaling factors are not free at all.
> - // Operands | Rt Latency
> - // -------------------------------------------
> - // Rt, [Xn, Xm] | 4
> - // -------------------------------------------
> - // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
> - // Rt, [Xn, Wm, <extend> #imm] |
> - if (isLegalAddressingMode(AM, Ty))
> - // Scale represents reg2 * scale, thus account for 1 if
> - // it is not equal to 0 or 1.
> - return AM.Scale != 0 && AM.Scale != 1;
> - return -1;
> -}
> -
> -/// getMaximalGlobalOffset - Returns the maximal possible offset which can
> -/// be used for loads / stores from the global.
> -unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
> - return 4095;
> -}
> -
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (removed)
> @@ -1,410 +0,0 @@
> -//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file defines the interfaces that AArch64 uses to lower LLVM code into a
> -// selection DAG.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H
> -#define LLVM_TARGET_AARCH64_ISELLOWERING_H
> -
> -#include "Utils/AArch64BaseInfo.h"
> -#include "llvm/CodeGen/CallingConvLower.h"
> -#include "llvm/CodeGen/SelectionDAG.h"
> -#include "llvm/IR/Intrinsics.h"
> -#include "llvm/Target/TargetLowering.h"
> -
> -namespace llvm {
> -namespace AArch64ISD {
> - enum NodeType {
> - // Start the numbering from where ISD NodeType finishes.
> - FIRST_NUMBER = ISD::BUILTIN_OP_END,
> -
> - // This is a conditional branch which also notes the flag needed
> - // (eq/sgt/...). A64 puts this information on the branches rather than
> - // compares as LLVM does.
> - BR_CC,
> -
> - // A node to be selected to an actual call operation: either BL or BLR in
> - // the absence of tail calls.
> - Call,
> -
> - // Indicates a floating-point immediate which fits into the format required
> - // by the FMOV instructions. First (and only) operand is the 8-bit encoded
> - // value of that immediate.
> - FPMOV,
> -
> - // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS
> - // and an LSB.
> - EXTR,
> -
> - // Wraps a load from the GOT, which should always be performed with a 64-bit
> - // load instruction. This prevents the DAG combiner folding a truncate to
> - // form a smaller memory access.
> - GOTLoad,
> -
> - // Performs a bitfield insert. Arguments are: the value being inserted into;
> - // the value being inserted; least significant bit changed; width of the
> - // field.
> - BFI,
> -
> - // Simply a convenient node inserted during ISelLowering to represent
> - // procedure return. Will almost certainly be selected to "RET".
> - Ret,
> -
> - /// Extracts a field of contiguous bits from the source and sign extends
> - /// them into a single register. Arguments are: source; immr; imms. Note
> - /// these are pre-encoded since DAG matching can't cope with combining LSB
> - /// and Width into these values itself.
> - SBFX,
> -
> - /// This is an A64-ification of the standard LLVM SELECT_CC operation. The
> - /// main difference is that it only has the values and an A64 condition,
> - /// which will be produced by a setcc instruction.
> - SELECT_CC,
> -
> - /// This serves most of the functions of the LLVM SETCC instruction, for two
> - /// purposes. First, it prevents optimisations from fiddling with the
> - /// compare after we've moved the CondCode information onto the SELECT_CC or
> - /// BR_CC instructions. Second, it gives a legal instruction for the actual
> - /// comparison.
> - ///
> - /// It keeps a record of the condition flags asked for because certain
> - /// instructions are only valid for a subset of condition codes.
> - SETCC,
> -
> - // Designates a node which is a tail call: both a call and a return
> - // instruction as far as selction is concerned. It should be selected to an
> - // unconditional branch. Has the usual plethora of call operands, but: 1st
> - // is callee, 2nd is stack adjustment required immediately before branch.
> - TC_RETURN,
> -
> - // Designates a call used to support the TLS descriptor ABI. The call itself
> - // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall
> - // var") must be attached somehow during code generation. It takes two
> - // operands: the callee and the symbol to be relocated against.
> - TLSDESCCALL,
> -
> - // Leaf node which will be lowered to an appropriate MRS to obtain the
> - // thread pointer: TPIDR_EL0.
> - THREAD_POINTER,
> -
> - /// Extracts a field of contiguous bits from the source and zero extends
> - /// them into a single register. Arguments are: source; immr; imms. Note
> - /// these are pre-encoded since DAG matching can't cope with combining LSB
> - /// and Width into these values itself.
> - UBFX,
> -
> - // Wraps an address which the ISelLowering phase has decided should be
> - // created using the large memory model style: i.e. a sequence of four
> - // movz/movk instructions.
> - WrapperLarge,
> -
> - // Wraps an address which the ISelLowering phase has decided should be
> - // created using the small memory model style: i.e. adrp/add or
> - // adrp/mem-op. This exists to prevent bare TargetAddresses which may never
> - // get selected.
> - WrapperSmall,
> -
> - // Vector move immediate
> - NEON_MOVIMM,
> -
> - // Vector Move Inverted Immediate
> - NEON_MVNIMM,
> -
> - // Vector FP move immediate
> - NEON_FMOVIMM,
> -
> - // Vector permute
> - NEON_UZP1,
> - NEON_UZP2,
> - NEON_ZIP1,
> - NEON_ZIP2,
> - NEON_TRN1,
> - NEON_TRN2,
> -
> - // Vector Element reverse
> - NEON_REV64,
> - NEON_REV32,
> - NEON_REV16,
> -
> - // Vector compare
> - NEON_CMP,
> -
> - // Vector compare zero
> - NEON_CMPZ,
> -
> - // Vector compare bitwise test
> - NEON_TST,
> -
> - // Vector saturating shift
> - NEON_QSHLs,
> - NEON_QSHLu,
> -
> - // Vector dup
> - NEON_VDUP,
> -
> - // Vector dup by lane
> - NEON_VDUPLANE,
> -
> - // Vector extract
> - NEON_VEXTRACT,
> -
> - // NEON duplicate lane loads
> - NEON_LD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
> - NEON_LD3DUP,
> - NEON_LD4DUP,
> -
> - // NEON loads with post-increment base updates:
> - NEON_LD1_UPD,
> - NEON_LD2_UPD,
> - NEON_LD3_UPD,
> - NEON_LD4_UPD,
> - NEON_LD1x2_UPD,
> - NEON_LD1x3_UPD,
> - NEON_LD1x4_UPD,
> -
> - // NEON stores with post-increment base updates:
> - NEON_ST1_UPD,
> - NEON_ST2_UPD,
> - NEON_ST3_UPD,
> - NEON_ST4_UPD,
> - NEON_ST1x2_UPD,
> - NEON_ST1x3_UPD,
> - NEON_ST1x4_UPD,
> -
> - // NEON duplicate lane loads with post-increment base updates:
> - NEON_LD2DUP_UPD,
> - NEON_LD3DUP_UPD,
> - NEON_LD4DUP_UPD,
> -
> - // NEON lane loads with post-increment base updates:
> - NEON_LD2LN_UPD,
> - NEON_LD3LN_UPD,
> - NEON_LD4LN_UPD,
> -
> - // NEON lane store with post-increment base updates:
> - NEON_ST2LN_UPD,
> - NEON_ST3LN_UPD,
> - NEON_ST4LN_UPD
> - };
> -}
> -
> -
> -class AArch64Subtarget;
> -class AArch64TargetMachine;
> -
> -class AArch64TargetLowering : public TargetLowering {
> -public:
> - explicit AArch64TargetLowering(AArch64TargetMachine &TM);
> -
> - const char *getTargetNodeName(unsigned Opcode) const override;
> -
> - CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const;
> -
> - SDValue LowerFormalArguments(SDValue Chain,
> - CallingConv::ID CallConv, bool isVarArg,
> - const SmallVectorImpl<ISD::InputArg> &Ins,
> - SDLoc dl, SelectionDAG &DAG,
> - SmallVectorImpl<SDValue> &InVals) const override;
> -
> - SDValue LowerReturn(SDValue Chain,
> - CallingConv::ID CallConv, bool isVarArg,
> - const SmallVectorImpl<ISD::OutputArg> &Outs,
> - const SmallVectorImpl<SDValue> &OutVals,
> - SDLoc dl, SelectionDAG &DAG) const override;
> -
> - unsigned getByValTypeAlignment(Type *Ty) const override;
> -
> - SDValue LowerCall(CallLoweringInfo &CLI,
> - SmallVectorImpl<SDValue> &InVals) const override;
> -
> - SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
> - CallingConv::ID CallConv, bool IsVarArg,
> - const SmallVectorImpl<ISD::InputArg> &Ins,
> - SDLoc dl, SelectionDAG &DAG,
> - SmallVectorImpl<SDValue> &InVals) const;
> -
> - SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
> -
> - bool isConcatVector(SDValue Op, SelectionDAG &DAG, SDValue V0, SDValue V1,
> - const int *Mask, SDValue &Res) const;
> -
> - bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0,
> - SDValue &V1, int *Mask) const;
> -
> - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
> - const AArch64Subtarget *ST) const;
> -
> - SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
> -
> - void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
> - SDValue &Chain) const;
> -
> - /// IsEligibleForTailCallOptimization - Check whether the call is eligible
> - /// for tail call optimization. Targets which want to do tail call
> - /// optimization should implement this function.
> - bool IsEligibleForTailCallOptimization(SDValue Callee,
> - CallingConv::ID CalleeCC,
> - bool IsVarArg,
> - bool IsCalleeStructRet,
> - bool IsCallerStructRet,
> - const SmallVectorImpl<ISD::OutputArg> &Outs,
> - const SmallVectorImpl<SDValue> &OutVals,
> - const SmallVectorImpl<ISD::InputArg> &Ins,
> - SelectionDAG& DAG) const;
> -
> - /// Finds the incoming stack arguments which overlap the given fixed stack
> - /// object and incorporates their load into the current chain. This prevents
> - /// an upcoming store from clobbering the stack argument before it's used.
> - SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
> - MachineFrameInfo *MFI, int ClobberedFI) const;
> -
> - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
> -
> - bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
> -
> - bool IsTailCallConvention(CallingConv::ID CallCC) const;
> -
> - SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
> -
> - bool isLegalICmpImmediate(int64_t Val) const override;
> -
> - /// \brief Return true if the addressing mode represented by AM is legal for
> - /// this target, for a load/store of the specified type.
> - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
> -
> - /// \brief Return the cost of the scaling factor used in the addressing
> - /// mode represented by AM for this target, for a load/store
> - /// of the specified type.
> - /// If the AM is supported, the return value must be >= 0.
> - /// If the AM is not supported, it returns a negative value.
> - int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override;
> -
> - bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
> - bool isTruncateFree(EVT VT1, EVT VT2) const override;
> -
> - bool isZExtFree(Type *Ty1, Type *Ty2) const override;
> - bool isZExtFree(EVT VT1, EVT VT2) const override;
> - bool isZExtFree(SDValue Val, EVT VT2) const override;
> -
> - SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
> - SDValue &A64cc, SelectionDAG &DAG, SDLoc &dl) const;
> -
> - MachineBasicBlock *
> - EmitInstrWithCustomInserter(MachineInstr *MI,
> - MachineBasicBlock *MBB) const override;
> -
> - MachineBasicBlock *
> - emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB,
> - unsigned Size, unsigned Opcode) const;
> -
> - MachineBasicBlock *
> - emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB,
> - unsigned Size, unsigned CmpOp,
> - A64CC::CondCodes Cond) const;
> - MachineBasicBlock *
> - emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
> - unsigned Size) const;
> -
> - MachineBasicBlock *
> - EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const;
> -
> - SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
> - RTLIB::Libcall Call) const;
> - SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
> - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
> -
> - SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
> -
> - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
> -
> - SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
> - SelectionDAG &DAG) const;
> - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
> - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
> -
> - SDValue PerformDAGCombine(SDNode *N,DAGCombinerInfo &DCI) const override;
> -
> - unsigned getRegisterByName(const char* RegName, EVT VT) const override;
> -
> - /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
> - /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
> - /// expanded to FMAs when this method returns true, otherwise fmuladd is
> - /// expanded to fmul + fadd.
> - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
> -
> - /// allowsUnalignedMemoryAccesses - Returns true if the target allows
> - /// unaligned memory accesses of the specified type. Returns whether it
> - /// is "fast" by reference in the second argument.
> - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
> - bool *Fast) const override;
> -
> - ConstraintType
> - getConstraintType(const std::string &Constraint) const override;
> -
> - ConstraintWeight
> - getSingleConstraintMatchWeight(AsmOperandInfo &Info,
> - const char *Constraint) const override;
> - void LowerAsmOperandForConstraint(SDValue Op,
> - std::string &Constraint,
> - std::vector<SDValue> &Ops,
> - SelectionDAG &DAG) const override;
> -
> - std::pair<unsigned, const TargetRegisterClass*>
> - getRegForInlineAsmConstraint(const std::string &Constraint,
> - MVT VT) const override;
> -
> - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
> - unsigned Intrinsic) const override;
> -
> - /// getMaximalGlobalOffset - Returns the maximal possible offset which can
> - /// be used for loads / stores from the global.
> - unsigned getMaximalGlobalOffset() const override;
> -
> -protected:
> - std::pair<const TargetRegisterClass*, uint8_t>
> - findRepresentativeClass(MVT VT) const override;
> -
> -private:
> - const InstrItineraryData *Itins;
> -
> - const AArch64Subtarget *getSubtarget() const {
> - return &getTargetMachine().getSubtarget<AArch64Subtarget>();
> - }
> -};
> -enum NeonModImmType {
> - Neon_Mov_Imm,
> - Neon_Mvn_Imm
> -};
> -
> -extern SDValue ScanBUILD_VECTOR(SDValue Op, bool &isOnlyLowElement,
> - bool &usesOnlyOneValue, bool &hasDominantValue,
> - bool &isConstant, bool &isUNDEF);
> -} // namespace llvm
> -
> -#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td (removed)
> @@ -1,1487 +0,0 @@
> -//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -// This file describes AArch64 instruction formats, down to the level of the
> -// instruction's overall class.
> -//===----------------------------------------------------------------------===//
> -
> -
> -//===----------------------------------------------------------------------===//
> -// A64 Instruction Format Definitions.
> -//===----------------------------------------------------------------------===//
> -
> -// A64 is currently the only instruction set supported by the AArch64
> -// architecture.
> -class A64Inst<dag outs, dag ins, string asmstr, list<dag> patterns,
> - InstrItinClass itin>
> - : Instruction {
> - // All A64 instructions are 32-bit. This field will be filled in
> - // gradually going down the hierarchy.
> - field bits<32> Inst;
> -
> - field bits<32> Unpredictable = 0;
> - // SoftFail is the generic name for this field, but we alias it so
> - // as to make it more obvious what it means in ARM-land.
> - field bits<32> SoftFail = Unpredictable;
> -
> - // LLVM-level model of the AArch64/A64 distinction.
> - let Namespace = "AArch64";
> - let DecoderNamespace = "A64";
> - let Size = 4;
> -
> - // Set the templated fields
> - let OutOperandList = outs;
> - let InOperandList = ins;
> - let AsmString = asmstr;
> - let Pattern = patterns;
> - let Itinerary = itin;
> -}
> -
> -class PseudoInst<dag outs, dag ins, list<dag> patterns> : Instruction {
> - let Namespace = "AArch64";
> -
> - let OutOperandList = outs;
> - let InOperandList= ins;
> - let Pattern = patterns;
> - let isCodeGenOnly = 1;
> - let isPseudo = 1;
> -}
> -
> -// Represents a pseudo-instruction that represents a single A64 instruction for
> -// whatever reason, the eventual result will be a 32-bit real instruction.
> -class A64PseudoInst<dag outs, dag ins, list<dag> patterns>
> - : PseudoInst<outs, ins, patterns> {
> - let Size = 4;
> -}
> -
> -// As above, this will be a single A64 instruction, but we can actually give the
> -// expansion in TableGen.
> -class A64PseudoExpand<dag outs, dag ins, list<dag> patterns, dag Result>
> - : A64PseudoInst<outs, ins, patterns>,
> - PseudoInstExpansion<Result>;
> -
> -
> -// First, some common cross-hierarchy register formats.
> -
> -class A64InstRd<dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64Inst<outs, ins, asmstr, patterns, itin> {
> - bits<5> Rd;
> -
> - let Inst{4-0} = Rd;
> -}
> -
> -class A64InstRt<dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64Inst<outs, ins, asmstr, patterns, itin> {
> - bits<5> Rt;
> -
> - let Inst{4-0} = Rt;
> -}
> -
> -
> -class A64InstRdn<dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRd<outs, ins, asmstr, patterns, itin> {
> - // Inherit rdt
> - bits<5> Rn;
> -
> - let Inst{9-5} = Rn;
> -}
> -
> -class A64InstRtn<dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRt<outs, ins, asmstr, patterns, itin> {
> - // Inherit rdt
> - bits<5> Rn;
> -
> - let Inst{9-5} = Rn;
> -}
> -
> -// Instructions taking Rt,Rt2,Rn
> -class A64InstRtt2n<dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> - bits<5> Rt2;
> -
> - let Inst{14-10} = Rt2;
> -}
> -
> -class A64InstRdnm<dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - bits<5> Rm;
> -
> - let Inst{20-16} = Rm;
> -}
> -
> -class A64InstRtnm<dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> - bits<5> Rm;
> -
> - let Inst{20-16} = Rm;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -//
> -// Actual A64 Instruction Formats
> -//
> -
> -// Format for Add-subtract (extended register) instructions.
> -class A64I_addsubext<bit sf, bit op, bit S, bits<2> opt, bits<3> option,
> - dag outs, dag ins, string asmstr, list<dag> patterns,
> - InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - bits<3> Imm3;
> -
> - let Inst{31} = sf;
> - let Inst{30} = op;
> - let Inst{29} = S;
> - let Inst{28-24} = 0b01011;
> - let Inst{23-22} = opt;
> - let Inst{21} = 0b1;
> - // Rm inherited in 20-16
> - let Inst{15-13} = option;
> - let Inst{12-10} = Imm3;
> - // Rn inherited in 9-5
> - // Rd inherited in 4-0
> -}
> -
> -// Format for Add-subtract (immediate) instructions.
> -class A64I_addsubimm<bit sf, bit op, bit S, bits<2> shift,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - bits<12> Imm12;
> -
> - let Inst{31} = sf;
> - let Inst{30} = op;
> - let Inst{29} = S;
> - let Inst{28-24} = 0b10001;
> - let Inst{23-22} = shift;
> - let Inst{21-10} = Imm12;
> -}
> -
> -// Format for Add-subtract (shifted register) instructions.
> -class A64I_addsubshift<bit sf, bit op, bit S, bits<2> shift,
> - dag outs, dag ins, string asmstr, list<dag> patterns,
> - InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - bits<6> Imm6;
> -
> - let Inst{31} = sf;
> - let Inst{30} = op;
> - let Inst{29} = S;
> - let Inst{28-24} = 0b01011;
> - let Inst{23-22} = shift;
> - let Inst{21} = 0b0;
> - // Rm inherited in 20-16
> - let Inst{15-10} = Imm6;
> - // Rn inherited in 9-5
> - // Rd inherited in 4-0
> -}
> -
> -// Format for Add-subtract (with carry) instructions.
> -class A64I_addsubcarry<bit sf, bit op, bit S, bits<6> opcode2,
> - dag outs, dag ins, string asmstr, list<dag> patterns,
> - InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = sf;
> - let Inst{30} = op;
> - let Inst{29} = S;
> - let Inst{28-21} = 0b11010000;
> - // Rm inherited in 20-16
> - let Inst{15-10} = opcode2;
> - // Rn inherited in 9-5
> - // Rd inherited in 4-0
> -}
> -
> -
> -// Format for Bitfield instructions
> -class A64I_bitfield<bit sf, bits<2> opc, bit n,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - bits<6> ImmR;
> - bits<6> ImmS;
> -
> - let Inst{31} = sf;
> - let Inst{30-29} = opc;
> - let Inst{28-23} = 0b100110;
> - let Inst{22} = n;
> - let Inst{21-16} = ImmR;
> - let Inst{15-10} = ImmS;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format for compare and branch (immediate) instructions.
> -class A64I_cmpbr<bit sf, bit op,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRt<outs, ins, asmstr, patterns, itin> {
> - bits<19> Label;
> -
> - let Inst{31} = sf;
> - let Inst{30-25} = 0b011010;
> - let Inst{24} = op;
> - let Inst{23-5} = Label;
> - // Inherit Rt in 4-0
> -}
> -
> -// Format for conditional branch (immediate) instructions.
> -class A64I_condbr<bit o1, bit o0,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64Inst<outs, ins, asmstr, patterns, itin> {
> - bits<19> Label;
> - bits<4> Cond;
> -
> - let Inst{31-25} = 0b0101010;
> - let Inst{24} = o1;
> - let Inst{23-5} = Label;
> - let Inst{4} = o0;
> - let Inst{3-0} = Cond;
> -}
> -
> -// Format for conditional compare (immediate) instructions.
> -class A64I_condcmpimm<bit sf, bit op, bit o2, bit o3, bit s,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64Inst<outs, ins, asmstr, patterns, itin> {
> - bits<5> Rn;
> - bits<5> UImm5;
> - bits<4> NZCVImm;
> - bits<4> Cond;
> -
> - let Inst{31} = sf;
> - let Inst{30} = op;
> - let Inst{29} = s;
> - let Inst{28-21} = 0b11010010;
> - let Inst{20-16} = UImm5;
> - let Inst{15-12} = Cond;
> - let Inst{11} = 0b1;
> - let Inst{10} = o2;
> - let Inst{9-5} = Rn;
> - let Inst{4} = o3;
> - let Inst{3-0} = NZCVImm;
> -}
> -
> -// Format for conditional compare (register) instructions.
> -class A64I_condcmpreg<bit sf, bit op, bit o2, bit o3, bit s,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64Inst<outs, ins, asmstr, patterns, itin> {
> - bits<5> Rn;
> - bits<5> Rm;
> - bits<4> NZCVImm;
> - bits<4> Cond;
> -
> -
> - let Inst{31} = sf;
> - let Inst{30} = op;
> - let Inst{29} = s;
> - let Inst{28-21} = 0b11010010;
> - let Inst{20-16} = Rm;
> - let Inst{15-12} = Cond;
> - let Inst{11} = 0b0;
> - let Inst{10} = o2;
> - let Inst{9-5} = Rn;
> - let Inst{4} = o3;
> - let Inst{3-0} = NZCVImm;
> -}
> -
> -// Format for conditional select instructions.
> -class A64I_condsel<bit sf, bit op, bit s, bits<2> op2,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - bits<4> Cond;
> -
> - let Inst{31} = sf;
> - let Inst{30} = op;
> - let Inst{29} = s;
> - let Inst{28-21} = 0b11010100;
> - // Inherit Rm in 20-16
> - let Inst{15-12} = Cond;
> - let Inst{11-10} = op2;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format for data processing (1 source) instructions
> -class A64I_dp_1src<bit sf, bit S, bits<5> opcode2, bits<6> opcode,
> - string asmstr, dag outs, dag ins,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = sf;
> - let Inst{30} = 0b1;
> - let Inst{29} = S;
> - let Inst{28-21} = 0b11010110;
> - let Inst{20-16} = opcode2;
> - let Inst{15-10} = opcode;
> -}
> -
> -// Format for data processing (2 source) instructions
> -class A64I_dp_2src<bit sf, bits<6> opcode, bit S,
> - string asmstr, dag outs, dag ins,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = sf;
> - let Inst{30} = 0b0;
> - let Inst{29} = S;
> - let Inst{28-21} = 0b11010110;
> - let Inst{15-10} = opcode;
> -}
> -
> -// Format for data-processing (3 source) instructions
> -
> -class A64I_dp3<bit sf, bits<6> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = sf;
> - let Inst{30-29} = opcode{5-4};
> - let Inst{28-24} = 0b11011;
> - let Inst{23-21} = opcode{3-1};
> - // Inherits Rm in 20-16
> - let Inst{15} = opcode{0};
> - // {14-10} mostly Ra, but unspecified for SMULH/UMULH
> - // Inherits Rn in 9-5
> - // Inherits Rd in 4-0
> -}
> -
> -// Format for exception generation instructions
> -class A64I_exception<bits<3> opc, bits<3> op2, bits<2> ll,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64Inst<outs, ins, asmstr, patterns, itin> {
> - bits<16> UImm16;
> -
> - let Inst{31-24} = 0b11010100;
> - let Inst{23-21} = opc;
> - let Inst{20-5} = UImm16;
> - let Inst{4-2} = op2;
> - let Inst{1-0} = ll;
> -}
> -
> -// Format for extract (immediate) instructions
> -class A64I_extract<bit sf, bits<3> op, bit n,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - bits<6> LSB;
> -
> - let Inst{31} = sf;
> - let Inst{30-29} = op{2-1};
> - let Inst{28-23} = 0b100111;
> - let Inst{22} = n;
> - let Inst{21} = op{0};
> - // Inherits Rm in bits 20-16
> - let Inst{15-10} = LSB;
> - // Inherits Rn in 9-5
> - // Inherits Rd in 4-0
> -}
> -
> -let Predicates = [HasFPARMv8] in {
> -
> -// Format for floating-point compare instructions.
> -class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64Inst<outs, ins, asmstr, patterns, itin> {
> - bits<5> Rn;
> - bits<5> Rm;
> -
> - let Inst{31} = m;
> - let Inst{30} = 0b0;
> - let Inst{29} = s;
> - let Inst{28-24} = 0b11110;
> - let Inst{23-22} = type;
> - let Inst{21} = 0b1;
> - let Inst{20-16} = Rm;
> - let Inst{15-14} = op;
> - let Inst{13-10} = 0b1000;
> - let Inst{9-5} = Rn;
> - let Inst{4-0} = opcode2;
> -}
> -
> -// Format for floating-point conditional compare instructions.
> -class A64I_fpccmp<bit m, bit s, bits<2> type, bit op,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - bits<5> Rn;
> - bits<5> Rm;
> - bits<4> NZCVImm;
> - bits<4> Cond;
> -
> - let Inst{31} = m;
> - let Inst{30} = 0b0;
> - let Inst{29} = s;
> - let Inst{28-24} = 0b11110;
> - let Inst{23-22} = type;
> - let Inst{21} = 0b1;
> - let Inst{20-16} = Rm;
> - let Inst{15-12} = Cond;
> - let Inst{11-10} = 0b01;
> - let Inst{9-5} = Rn;
> - let Inst{4} = op;
> - let Inst{3-0} = NZCVImm;
> -}
> -
> -// Format for floating-point conditional select instructions.
> -class A64I_fpcondsel<bit m, bit s, bits<2> type,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - bits<4> Cond;
> -
> - let Inst{31} = m;
> - let Inst{30} = 0b0;
> - let Inst{29} = s;
> - let Inst{28-24} = 0b11110;
> - let Inst{23-22} = type;
> - let Inst{21} = 0b1;
> - // Inherit Rm in 20-16
> - let Inst{15-12} = Cond;
> - let Inst{11-10} = 0b11;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -
> -// Format for floating-point data-processing (1 source) instructions.
> -class A64I_fpdp1<bit m, bit s, bits<2> type, bits<6> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = m;
> - let Inst{30} = 0b0;
> - let Inst{29} = s;
> - let Inst{28-24} = 0b11110;
> - let Inst{23-22} = type;
> - let Inst{21} = 0b1;
> - let Inst{20-15} = opcode;
> - let Inst{14-10} = 0b10000;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format for floating-point data-processing (2 sources) instructions.
> -class A64I_fpdp2<bit m, bit s, bits<2> type, bits<4> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = m;
> - let Inst{30} = 0b0;
> - let Inst{29} = s;
> - let Inst{28-24} = 0b11110;
> - let Inst{23-22} = type;
> - let Inst{21} = 0b1;
> - // Inherit Rm in 20-16
> - let Inst{15-12} = opcode;
> - let Inst{11-10} = 0b10;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format for floating-point data-processing (3 sources) instructions.
> -class A64I_fpdp3<bit m, bit s, bits<2> type, bit o1, bit o0,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - bits<5> Ra;
> -
> - let Inst{31} = m;
> - let Inst{30} = 0b0;
> - let Inst{29} = s;
> - let Inst{28-24} = 0b11111;
> - let Inst{23-22} = type;
> - let Inst{21} = o1;
> - // Inherit Rm in 20-16
> - let Inst{15} = o0;
> - let Inst{14-10} = Ra;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format for floating-point <-> fixed-point conversion instructions.
> -class A64I_fpfixed<bit sf, bit s, bits<2> type, bits<2> mode, bits<3> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - bits<6> Scale;
> -
> - let Inst{31} = sf;
> - let Inst{30} = 0b0;
> - let Inst{29} = s;
> - let Inst{28-24} = 0b11110;
> - let Inst{23-22} = type;
> - let Inst{21} = 0b0;
> - let Inst{20-19} = mode;
> - let Inst{18-16} = opcode;
> - let Inst{15-10} = Scale;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format for floating-point <-> integer conversion instructions.
> -class A64I_fpint<bit sf, bit s, bits<2> type, bits<2> rmode, bits<3> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = sf;
> - let Inst{30} = 0b0;
> - let Inst{29} = s;
> - let Inst{28-24} = 0b11110;
> - let Inst{23-22} = type;
> - let Inst{21} = 0b1;
> - let Inst{20-19} = rmode;
> - let Inst{18-16} = opcode;
> - let Inst{15-10} = 0b000000;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -
> -// Format for floating-point immediate instructions.
> -class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRd<outs, ins, asmstr, patterns, itin> {
> - bits<8> Imm8;
> -
> - let Inst{31} = m;
> - let Inst{30} = 0b0;
> - let Inst{29} = s;
> - let Inst{28-24} = 0b11110;
> - let Inst{23-22} = type;
> - let Inst{21} = 0b1;
> - let Inst{20-13} = Imm8;
> - let Inst{12-10} = 0b100;
> - let Inst{9-5} = imm5;
> - // Inherit Rd in 4-0
> -}
> -
> -}
> -
> -// Format for load-register (literal) instructions.
> -class A64I_LDRlit<bits<2> opc, bit v,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRt<outs, ins, asmstr, patterns, itin> {
> - bits<19> Imm19;
> -
> - let Inst{31-30} = opc;
> - let Inst{29-27} = 0b011;
> - let Inst{26} = v;
> - let Inst{25-24} = 0b00;
> - let Inst{23-5} = Imm19;
> - // Inherit Rt in 4-0
> -}
> -
> -// Format for load-store exclusive instructions.
> -class A64I_LDSTex_tn<bits<2> size, bit o2, bit L, bit o1, bit o0,
> - dag outs, dag ins, string asmstr,
> - list <dag> patterns, InstrItinClass itin>
> - : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> - let Inst{31-30} = size;
> - let Inst{29-24} = 0b001000;
> - let Inst{23} = o2;
> - let Inst{22} = L;
> - let Inst{21} = o1;
> - let Inst{15} = o0;
> -}
> -
> -class A64I_LDSTex_tt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
> - dag outs, dag ins, string asmstr,
> - list <dag> patterns, InstrItinClass itin>:
> - A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
> - bits<5> Rt2;
> - let Inst{14-10} = Rt2;
> -}
> -
> -class A64I_LDSTex_stn<bits<2> size, bit o2, bit L, bit o1, bit o0,
> - dag outs, dag ins, string asmstr,
> - list <dag> patterns, InstrItinClass itin>:
> - A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
> - bits<5> Rs;
> - let Inst{20-16} = Rs;
> -}
> -
> -class A64I_LDSTex_stt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
> - dag outs, dag ins, string asmstr,
> - list <dag> patterns, InstrItinClass itin>:
> - A64I_LDSTex_stn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
> - bits<5> Rt2;
> - let Inst{14-10} = Rt2;
> -}
> -
> -// Format for load-store register (immediate post-indexed) instructions
> -class A64I_LSpostind<bits<2> size, bit v, bits<2> opc,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> - bits<9> SImm9;
> -
> - let Inst{31-30} = size;
> - let Inst{29-27} = 0b111;
> - let Inst{26} = v;
> - let Inst{25-24} = 0b00;
> - let Inst{23-22} = opc;
> - let Inst{21} = 0b0;
> - let Inst{20-12} = SImm9;
> - let Inst{11-10} = 0b01;
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -// Format for load-store register (immediate pre-indexed) instructions
> -class A64I_LSpreind<bits<2> size, bit v, bits<2> opc,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> - bits<9> SImm9;
> -
> -
> - let Inst{31-30} = size;
> - let Inst{29-27} = 0b111;
> - let Inst{26} = v;
> - let Inst{25-24} = 0b00;
> - let Inst{23-22} = opc;
> - let Inst{21} = 0b0;
> - let Inst{20-12} = SImm9;
> - let Inst{11-10} = 0b11;
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -// Format for load-store register (unprivileged) instructions
> -class A64I_LSunpriv<bits<2> size, bit v, bits<2> opc,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> - bits<9> SImm9;
> -
> -
> - let Inst{31-30} = size;
> - let Inst{29-27} = 0b111;
> - let Inst{26} = v;
> - let Inst{25-24} = 0b00;
> - let Inst{23-22} = opc;
> - let Inst{21} = 0b0;
> - let Inst{20-12} = SImm9;
> - let Inst{11-10} = 0b10;
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -// Format for load-store (unscaled immediate) instructions.
> -class A64I_LSunalimm<bits<2> size, bit v, bits<2> opc,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> - bits<9> SImm9;
> -
> - let Inst{31-30} = size;
> - let Inst{29-27} = 0b111;
> - let Inst{26} = v;
> - let Inst{25-24} = 0b00;
> - let Inst{23-22} = opc;
> - let Inst{21} = 0b0;
> - let Inst{20-12} = SImm9;
> - let Inst{11-10} = 0b00;
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -
> -// Format for load-store (unsigned immediate) instructions.
> -class A64I_LSunsigimm<bits<2> size, bit v, bits<2> opc,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> - bits<12> UImm12;
> -
> - let Inst{31-30} = size;
> - let Inst{29-27} = 0b111;
> - let Inst{26} = v;
> - let Inst{25-24} = 0b01;
> - let Inst{23-22} = opc;
> - let Inst{21-10} = UImm12;
> -}
> -
> -// Format for load-store register (register offset) instructions.
> -class A64I_LSregoff<bits<2> size, bit v, bits<2> opc, bit optionlo,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtn<outs, ins, asmstr, patterns, itin> {
> - bits<5> Rm;
> -
> - // Complex operand selection needed for these instructions, so they
> - // need an "addr" field for encoding/decoding to be generated.
> - bits<3> Ext;
> - // OptionHi = Ext{2-1}
> - // S = Ext{0}
> -
> - let Inst{31-30} = size;
> - let Inst{29-27} = 0b111;
> - let Inst{26} = v;
> - let Inst{25-24} = 0b00;
> - let Inst{23-22} = opc;
> - let Inst{21} = 0b1;
> - let Inst{20-16} = Rm;
> - let Inst{15-14} = Ext{2-1};
> - let Inst{13} = optionlo;
> - let Inst{12} = Ext{0};
> - let Inst{11-10} = 0b10;
> - // Inherits Rn in 9-5
> - // Inherits Rt in 4-0
> -
> - let AddedComplexity = 50;
> -}
> -
> -// Format for Load-store register pair (offset) instructions
> -class A64I_LSPoffset<bits<2> opc, bit v, bit l,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
> - bits<7> SImm7;
> -
> - let Inst{31-30} = opc;
> - let Inst{29-27} = 0b101;
> - let Inst{26} = v;
> - let Inst{25-23} = 0b010;
> - let Inst{22} = l;
> - let Inst{21-15} = SImm7;
> - // Inherit Rt2 in 14-10
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -// Format for Load-store register pair (post-indexed) instructions
> -class A64I_LSPpostind<bits<2> opc, bit v, bit l,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
> - bits<7> SImm7;
> -
> - let Inst{31-30} = opc;
> - let Inst{29-27} = 0b101;
> - let Inst{26} = v;
> - let Inst{25-23} = 0b001;
> - let Inst{22} = l;
> - let Inst{21-15} = SImm7;
> - // Inherit Rt2 in 14-10
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -// Format for Load-store register pair (pre-indexed) instructions
> -class A64I_LSPpreind<bits<2> opc, bit v, bit l,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
> - bits<7> SImm7;
> -
> - let Inst{31-30} = opc;
> - let Inst{29-27} = 0b101;
> - let Inst{26} = v;
> - let Inst{25-23} = 0b011;
> - let Inst{22} = l;
> - let Inst{21-15} = SImm7;
> - // Inherit Rt2 in 14-10
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -// Format for Load-store non-temporal register pair (offset) instructions
> -class A64I_LSPnontemp<bits<2> opc, bit v, bit l,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
> - bits<7> SImm7;
> -
> - let Inst{31-30} = opc;
> - let Inst{29-27} = 0b101;
> - let Inst{26} = v;
> - let Inst{25-23} = 0b000;
> - let Inst{22} = l;
> - let Inst{21-15} = SImm7;
> - // Inherit Rt2 in 14-10
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -// Format for Logical (immediate) instructions
> -class A64I_logicalimm<bit sf, bits<2> opc,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - bit N;
> - bits<6> ImmR;
> - bits<6> ImmS;
> -
> - // N, ImmR and ImmS have no separate existence in any assembly syntax (or for
> - // selection), so we'll combine them into a single field here.
> - bits<13> Imm;
> - // N = Imm{12};
> - // ImmR = Imm{11-6};
> - // ImmS = Imm{5-0};
> -
> - let Inst{31} = sf;
> - let Inst{30-29} = opc;
> - let Inst{28-23} = 0b100100;
> - let Inst{22} = Imm{12};
> - let Inst{21-16} = Imm{11-6};
> - let Inst{15-10} = Imm{5-0};
> - // Rn inherited in 9-5
> - // Rd inherited in 4-0
> -}
> -
> -// Format for Logical (shifted register) instructions
> -class A64I_logicalshift<bit sf, bits<2> opc, bits<2> shift, bit N,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - bits<6> Imm6;
> -
> - let Inst{31} = sf;
> - let Inst{30-29} = opc;
> - let Inst{28-24} = 0b01010;
> - let Inst{23-22} = shift;
> - let Inst{21} = N;
> - // Rm inherited
> - let Inst{15-10} = Imm6;
> - // Rn inherited
> - // Rd inherited
> -}
> -
> -// Format for Move wide (immediate)
> -class A64I_movw<bit sf, bits<2> opc,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRd<outs, ins, asmstr, patterns, itin> {
> - bits<16> UImm16;
> - bits<2> Shift; // Called "hw" officially
> -
> - let Inst{31} = sf;
> - let Inst{30-29} = opc;
> - let Inst{28-23} = 0b100101;
> - let Inst{22-21} = Shift;
> - let Inst{20-5} = UImm16;
> - // Inherits Rd in 4-0
> -}
> -
> -// Format for PC-relative addressing instructions, ADR and ADRP.
> -class A64I_PCADR<bit op,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRd<outs, ins, asmstr, patterns, itin> {
> - bits<21> Label;
> -
> - let Inst{31} = op;
> - let Inst{30-29} = Label{1-0};
> - let Inst{28-24} = 0b10000;
> - let Inst{23-5} = Label{20-2};
> -}
> -
> -// Format for system instructions
> -class A64I_system<bit l,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64Inst<outs, ins, asmstr, patterns, itin> {
> - bits<2> Op0;
> - bits<3> Op1;
> - bits<4> CRn;
> - bits<4> CRm;
> - bits<3> Op2;
> - bits<5> Rt;
> -
> - let Inst{31-22} = 0b1101010100;
> - let Inst{21} = l;
> - let Inst{20-19} = Op0;
> - let Inst{18-16} = Op1;
> - let Inst{15-12} = CRn;
> - let Inst{11-8} = CRm;
> - let Inst{7-5} = Op2;
> - let Inst{4-0} = Rt;
> -
> - // These instructions can do horrible things.
> - let hasSideEffects = 1;
> -}
> -
> -// Format for unconditional branch (immediate) instructions
> -class A64I_Bimm<bit op,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64Inst<outs, ins, asmstr, patterns, itin> {
> - // Doubly special in not even sharing register fields with other
> - // instructions, so we create our own Rn here.
> - bits<26> Label;
> -
> - let Inst{31} = op;
> - let Inst{30-26} = 0b00101;
> - let Inst{25-0} = Label;
> -}
> -
> -// Format for Test & branch (immediate) instructions
> -class A64I_TBimm<bit op,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRt<outs, ins, asmstr, patterns, itin> {
> - // Doubly special in not even sharing register fields with other
> - // instructions, so we create our own Rn here.
> - bits<6> Imm;
> - bits<14> Label;
> -
> - let Inst{31} = Imm{5};
> - let Inst{30-25} = 0b011011;
> - let Inst{24} = op;
> - let Inst{23-19} = Imm{4-0};
> - let Inst{18-5} = Label;
> - // Inherit Rt in 4-0
> -}
> -
> -// Format for Unconditional branch (register) instructions, including
> -// RET. Shares no fields with instructions further up the hierarchy
> -// so top-level.
> -class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64Inst<outs, ins, asmstr, patterns, itin> {
> - // Doubly special in not even sharing register fields with other
> - // instructions, so we create our own Rn here.
> - bits<5> Rn;
> -
> - let Inst{31-25} = 0b1101011;
> - let Inst{24-21} = opc;
> - let Inst{20-16} = op2;
> - let Inst{15-10} = op3;
> - let Inst{9-5} = Rn;
> - let Inst{4-0} = op4;
> -}
> -
> -
> -//===----------------------------------------------------------------------===//
> -//
> -// Neon Instruction Format Definitions.
> -//
> -
> -let Predicates = [HasNEON] in {
> -
> -class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1>
> - : InstAlias<Asm, Result, Emit> {
> -}
> -
> -// Format AdvSIMD bitwise extract
> -class NeonI_BitExtract<bit q, bits<2> op2,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29-24} = 0b101110;
> - let Inst{23-22} = op2;
> - let Inst{21} = 0b0;
> - // Inherit Rm in 20-16
> - let Inst{15} = 0b0;
> - // imm4 in 14-11
> - let Inst{10} = 0b0;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD perm
> -class NeonI_Perm<bit q, bits<2> size, bits<3> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29-24} = 0b001110;
> - let Inst{23-22} = size;
> - let Inst{21} = 0b0;
> - // Inherit Rm in 20-16
> - let Inst{15} = 0b0;
> - let Inst{14-12} = opcode;
> - let Inst{11-10} = 0b10;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD table lookup
> -class NeonI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29-24} = 0b001110;
> - let Inst{23-22} = op2;
> - let Inst{21} = 0b0;
> - // Inherit Rm in 20-16
> - let Inst{15} = 0b0;
> - let Inst{14-13} = len;
> - let Inst{12} = op;
> - let Inst{11-10} = 0b00;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD 3 vector registers with same vector type
> -class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29} = u;
> - let Inst{28-24} = 0b01110;
> - let Inst{23-22} = size;
> - let Inst{21} = 0b1;
> - // Inherit Rm in 20-16
> - let Inst{15-11} = opcode;
> - let Inst{10} = 0b1;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD 3 vector registers with different vector type
> -class NeonI_3VDiff<bit q, bit u, bits<2> size, bits<4> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29} = u;
> - let Inst{28-24} = 0b01110;
> - let Inst{23-22} = size;
> - let Inst{21} = 0b1;
> - // Inherit Rm in 20-16
> - let Inst{15-12} = opcode;
> - let Inst{11} = 0b0;
> - let Inst{10} = 0b0;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD two registers and an element
> -class NeonI_2VElem<bit q, bit u, bits<2> size, bits<4> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29} = u;
> - let Inst{28-24} = 0b01111;
> - let Inst{23-22} = size;
> - // l in Inst{21}
> - // m in Inst{20}
> - // Inherit Rm in 19-16
> - let Inst{15-12} = opcode;
> - // h in Inst{11}
> - let Inst{10} = 0b0;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD 1 vector register with modified immediate
> -class NeonI_1VModImm<bit q, bit op,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRd<outs,ins, asmstr, patterns, itin> {
> - bits<8> Imm;
> - bits<4> cmode;
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29} = op;
> - let Inst{28-19} = 0b0111100000;
> - let Inst{15-12} = cmode;
> - let Inst{11} = 0b0; // o2
> - let Inst{10} = 1;
> - // Inherit Rd in 4-0
> - let Inst{18-16} = Imm{7-5}; // imm a:b:c
> - let Inst{9-5} = Imm{4-0}; // imm d:e:f:g:h
> -}
> -
> -// Format AdvSIMD 3 scalar registers with same type
> -
> -class NeonI_Scalar3Same<bit u, bits<2> size, bits<5> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = 0b0;
> - let Inst{30} = 0b1;
> - let Inst{29} = u;
> - let Inst{28-24} = 0b11110;
> - let Inst{23-22} = size;
> - let Inst{21} = 0b1;
> - // Inherit Rm in 20-16
> - let Inst{15-11} = opcode;
> - let Inst{10} = 0b1;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -
> -// Format AdvSIMD 2 vector registers miscellaneous
> -class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29} = u;
> - let Inst{28-24} = 0b01110;
> - let Inst{23-22} = size;
> - let Inst{21-17} = 0b10000;
> - let Inst{16-12} = opcode;
> - let Inst{11-10} = 0b10;
> -
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD 2 vector 1 immediate shift
> -class NeonI_2VShiftImm<bit q, bit u, bits<5> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - bits<7> Imm;
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29} = u;
> - let Inst{28-23} = 0b011110;
> - let Inst{22-16} = Imm;
> - let Inst{15-11} = opcode;
> - let Inst{10} = 0b1;
> -
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD duplicate and insert
> -class NeonI_copy<bit q, bit op, bits<4> imm4,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - bits<5> Imm5;
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29} = op;
> - let Inst{28-21} = 0b01110000;
> - let Inst{20-16} = Imm5;
> - let Inst{15} = 0b0;
> - let Inst{14-11} = imm4;
> - let Inst{10} = 0b1;
> -
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -// Format AdvSIMD insert from element to vector
> -class NeonI_insert<bit q, bit op,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - bits<5> Imm5;
> - bits<4> Imm4;
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29} = op;
> - let Inst{28-21} = 0b01110000;
> - let Inst{20-16} = Imm5;
> - let Inst{15} = 0b0;
> - let Inst{14-11} = Imm4;
> - let Inst{10} = 0b1;
> -
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD scalar pairwise
> -class NeonI_ScalarPair<bit u, bits<2> size, bits<5> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = 0b0;
> - let Inst{30} = 0b1;
> - let Inst{29} = u;
> - let Inst{28-24} = 0b11110;
> - let Inst{23-22} = size;
> - let Inst{21-17} = 0b11000;
> - let Inst{16-12} = opcode;
> - let Inst{11-10} = 0b10;
> -
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD 2 vector across lanes
> -class NeonI_2VAcross<bit q, bit u, bits<2> size, bits<5> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin>
> -{
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29} = u;
> - let Inst{28-24} = 0b01110;
> - let Inst{23-22} = size;
> - let Inst{21-17} = 0b11000;
> - let Inst{16-12} = opcode;
> - let Inst{11-10} = 0b10;
> -
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD scalar two registers miscellaneous
> -class NeonI_Scalar2SameMisc<bit u, bits<2> size, bits<5> opcode, dag outs, dag ins,
> - string asmstr, list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - let Inst{31} = 0b0;
> - let Inst{30} = 0b1;
> - let Inst{29} = u;
> - let Inst{28-24} = 0b11110;
> - let Inst{23-22} = size;
> - let Inst{21-17} = 0b10000;
> - let Inst{16-12} = opcode;
> - let Inst{11-10} = 0b10;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD vector load/store multiple N-element structure
> -class NeonI_LdStMult<bit q, bit l, bits<4> opcode, bits<2> size,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtn<outs, ins, asmstr, patterns, itin>
> -{
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29-23} = 0b0011000;
> - let Inst{22} = l;
> - let Inst{21-16} = 0b000000;
> - let Inst{15-12} = opcode;
> - let Inst{11-10} = size;
> -
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -// Format AdvSIMD vector load/store multiple N-element structure (post-index)
> -class NeonI_LdStMult_Post<bit q, bit l, bits<4> opcode, bits<2> size,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtnm<outs, ins, asmstr, patterns, itin>
> -{
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29-23} = 0b0011001;
> - let Inst{22} = l;
> - let Inst{21} = 0b0;
> - // Inherit Rm in 20-16
> - let Inst{15-12} = opcode;
> - let Inst{11-10} = size;
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -// Format AdvSIMD vector load Single N-element structure to all lanes
> -class NeonI_LdOne_Dup<bit q, bit r, bits<3> opcode, bits<2> size, dag outs,
> - dag ins, string asmstr, list<dag> patterns,
> - InstrItinClass itin>
> - : A64InstRtn<outs, ins, asmstr, patterns, itin>
> -{
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29-23} = 0b0011010;
> - let Inst{22} = 0b1;
> - let Inst{21} = r;
> - let Inst{20-16} = 0b00000;
> - let Inst{15-13} = opcode;
> - let Inst{12} = 0b0;
> - let Inst{11-10} = size;
> -
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -// Format AdvSIMD vector load/store Single N-element structure to/from one lane
> -class NeonI_LdStOne_Lane<bit l, bit r, bits<2> op2_1, bit op0, dag outs,
> - dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtn<outs, ins, asmstr, patterns, itin>
> -{
> - bits<4> lane;
> - let Inst{31} = 0b0;
> - let Inst{29-23} = 0b0011010;
> - let Inst{22} = l;
> - let Inst{21} = r;
> - let Inst{20-16} = 0b00000;
> - let Inst{15-14} = op2_1;
> - let Inst{13} = op0;
> -
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -// Format AdvSIMD post-index vector load Single N-element structure to all lanes
> -class NeonI_LdOne_Dup_Post<bit q, bit r, bits<3> opcode, bits<2> size, dag outs,
> - dag ins, string asmstr, list<dag> patterns,
> - InstrItinClass itin>
> - : A64InstRtnm<outs, ins, asmstr, patterns, itin>
> -{
> - let Inst{31} = 0b0;
> - let Inst{30} = q;
> - let Inst{29-23} = 0b0011011;
> - let Inst{22} = 0b1;
> - let Inst{21} = r;
> - // Inherit Rm in 20-16
> - let Inst{15-13} = opcode;
> - let Inst{12} = 0b0;
> - let Inst{11-10} = size;
> -
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -// Format AdvSIMD post-index vector load/store Single N-element structure
> -// to/from one lane
> -class NeonI_LdStOne_Lane_Post<bit l, bit r, bits<2> op2_1, bit op0, dag outs,
> - dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRtnm<outs, ins, asmstr, patterns, itin>
> -{
> - bits<4> lane;
> - let Inst{31} = 0b0;
> - let Inst{29-23} = 0b0011011;
> - let Inst{22} = l;
> - let Inst{21} = r;
> - // Inherit Rm in 20-16
> - let Inst{15-14} = op2_1;
> - let Inst{13} = op0;
> -
> - // Inherit Rn in 9-5
> - // Inherit Rt in 4-0
> -}
> -
> -// Format AdvSIMD 3 scalar registers with different type
> -
> -class NeonI_Scalar3Diff<bit u, bits<2> size, bits<4> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - let Inst{31-30} = 0b01;
> - let Inst{29} = u;
> - let Inst{28-24} = 0b11110;
> - let Inst{23-22} = size;
> - let Inst{21} = 0b1;
> - // Inherit Rm in 20-16
> - let Inst{15-12} = opcode;
> - let Inst{11-10} = 0b00;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD scalar shift by immediate
> -
> -class NeonI_ScalarShiftImm<bit u, bits<5> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - bits<4> Imm4;
> - bits<3> Imm3;
> - let Inst{31-30} = 0b01;
> - let Inst{29} = u;
> - let Inst{28-23} = 0b111110;
> - let Inst{22-19} = Imm4;
> - let Inst{18-16} = Imm3;
> - let Inst{15-11} = opcode;
> - let Inst{10} = 0b1;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD crypto AES
> -class NeonI_Crypto_AES<bits<2> size, bits<5> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - let Inst{31-24} = 0b01001110;
> - let Inst{23-22} = size;
> - let Inst{21-17} = 0b10100;
> - let Inst{16-12} = opcode;
> - let Inst{11-10} = 0b10;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD crypto SHA
> -class NeonI_Crypto_SHA<bits<2> size, bits<5> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdn<outs, ins, asmstr, patterns, itin> {
> - let Inst{31-24} = 0b01011110;
> - let Inst{23-22} = size;
> - let Inst{21-17} = 0b10100;
> - let Inst{16-12} = opcode;
> - let Inst{11-10} = 0b10;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD crypto 3V SHA
> -class NeonI_Crypto_3VSHA<bits<2> size, bits<3> opcode,
> - dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
> - let Inst{31-24} = 0b01011110;
> - let Inst{23-22} = size;
> - let Inst{21} = 0b0;
> - // Inherit Rm in 20-16
> - let Inst{15} = 0b0;
> - let Inst{14-12} = opcode;
> - let Inst{11-10} = 0b00;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -
> -// Format AdvSIMD scalar x indexed element
> -class NeonI_ScalarXIndexedElem<bit u, bit szhi, bit szlo,
> - bits<4> opcode, dag outs, dag ins,
> - string asmstr, list<dag> patterns,
> - InstrItinClass itin>
> - : A64InstRdnm<outs, ins, asmstr, patterns, itin>
> -{
> - let Inst{31} = 0b0;
> - let Inst{30} = 0b1;
> - let Inst{29} = u;
> - let Inst{28-24} = 0b11111;
> - let Inst{23} = szhi;
> - let Inst{22} = szlo;
> - // l in Inst{21}
> - // m in Instr{20}
> - // Inherit Rm in 19-16
> - let Inst{15-12} = opcode;
> - // h in Inst{11}
> - let Inst{10} = 0b0;
> - // Inherit Rn in 9-5
> - // Inherit Rd in 4-0
> -}
> -// Format AdvSIMD scalar copy - insert from element to scalar
> -class NeonI_ScalarCopy<dag outs, dag ins, string asmstr,
> - list<dag> patterns, InstrItinClass itin>
> - : NeonI_copy<0b1, 0b0, 0b0000, outs, ins, asmstr, patterns, itin> {
> - let Inst{28} = 0b1;
> -}
> -}
> -
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (removed)
> @@ -1,979 +0,0 @@
> -//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file contains the AArch64 implementation of the TargetInstrInfo class.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#include "AArch64.h"
> -#include "AArch64InstrInfo.h"
> -#include "AArch64MachineFunctionInfo.h"
> -#include "AArch64TargetMachine.h"
> -#include "MCTargetDesc/AArch64MCTargetDesc.h"
> -#include "Utils/AArch64BaseInfo.h"
> -#include "llvm/CodeGen/MachineConstantPool.h"
> -#include "llvm/CodeGen/MachineDominators.h"
> -#include "llvm/CodeGen/MachineFrameInfo.h"
> -#include "llvm/CodeGen/MachineFunctionPass.h"
> -#include "llvm/CodeGen/MachineInstrBuilder.h"
> -#include "llvm/CodeGen/MachineRegisterInfo.h"
> -#include "llvm/IR/Function.h"
> -#include "llvm/Support/ErrorHandling.h"
> -#include "llvm/Support/TargetRegistry.h"
> -#include <algorithm>
> -
> -using namespace llvm;
> -
> -#define GET_INSTRINFO_CTOR_DTOR
> -#include "AArch64GenInstrInfo.inc"
> -
> -AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
> - : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
> - Subtarget(STI) {}
> -
> -void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator I, DebugLoc DL,
> - unsigned DestReg, unsigned SrcReg,
> - bool KillSrc) const {
> - unsigned Opc = 0;
> - unsigned ZeroReg = 0;
> - if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) {
> - // E.g. ADD xDst, xsp, #0 (, lsl #0)
> - BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg)
> - .addReg(SrcReg)
> - .addImm(0);
> - return;
> - } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
> - // E.g. ADD wDST, wsp, #0 (, lsl #0)
> - BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg)
> - .addReg(SrcReg)
> - .addImm(0);
> - return;
> - } else if (DestReg == AArch64::NZCV) {
> - assert(AArch64::GPR64RegClass.contains(SrcReg));
> - // E.g. MSR NZCV, xDST
> - BuildMI(MBB, I, DL, get(AArch64::MSRix))
> - .addImm(A64SysReg::NZCV)
> - .addReg(SrcReg);
> - } else if (SrcReg == AArch64::NZCV) {
> - assert(AArch64::GPR64RegClass.contains(DestReg));
> - // E.g. MRS xDST, NZCV
> - BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
> - .addImm(A64SysReg::NZCV);
> - } else if (AArch64::GPR64RegClass.contains(DestReg)) {
> - if(AArch64::GPR64RegClass.contains(SrcReg)){
> - Opc = AArch64::ORRxxx_lsl;
> - ZeroReg = AArch64::XZR;
> - } else{
> - assert(AArch64::FPR64RegClass.contains(SrcReg));
> - BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg)
> - .addReg(SrcReg);
> - return;
> - }
> - } else if (AArch64::GPR32RegClass.contains(DestReg)) {
> - if(AArch64::GPR32RegClass.contains(SrcReg)){
> - Opc = AArch64::ORRwww_lsl;
> - ZeroReg = AArch64::WZR;
> - } else{
> - assert(AArch64::FPR32RegClass.contains(SrcReg));
> - BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg)
> - .addReg(SrcReg);
> - return;
> - }
> - } else if (AArch64::FPR32RegClass.contains(DestReg)) {
> - if(AArch64::FPR32RegClass.contains(SrcReg)){
> - BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
> - .addReg(SrcReg);
> - return;
> - }
> - else {
> - assert(AArch64::GPR32RegClass.contains(SrcReg));
> - BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg)
> - .addReg(SrcReg);
> - return;
> - }
> - } else if (AArch64::FPR64RegClass.contains(DestReg)) {
> - if(AArch64::FPR64RegClass.contains(SrcReg)){
> - BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
> - .addReg(SrcReg);
> - return;
> - }
> - else {
> - assert(AArch64::GPR64RegClass.contains(SrcReg));
> - BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg)
> - .addReg(SrcReg);
> - return;
> - }
> - } else if (AArch64::FPR128RegClass.contains(DestReg)) {
> - assert(AArch64::FPR128RegClass.contains(SrcReg));
> -
> - // If NEON is enable, we use ORR to implement this copy.
> - // If NEON isn't available, emit STR and LDR to handle this.
> - if(getSubTarget().hasNEON()) {
> - BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg)
> - .addReg(SrcReg)
> - .addReg(SrcReg);
> - return;
> - } else {
> - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
> - .addReg(SrcReg)
> - .addReg(AArch64::XSP)
> - .addImm(0x1ff & -16);
> -
> - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
> - .addReg(AArch64::XSP, RegState::Define)
> - .addReg(AArch64::XSP)
> - .addImm(16);
> - return;
> - }
> - } else if (AArch64::FPR8RegClass.contains(DestReg, SrcReg)) {
> - // The copy of two FPR8 registers is implemented by the copy of two FPR32
> - const TargetRegisterInfo *TRI = &getRegisterInfo();
> - unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_8,
> - &AArch64::FPR32RegClass);
> - unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_8,
> - &AArch64::FPR32RegClass);
> - BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst)
> - .addReg(Src);
> - return;
> - } else if (AArch64::FPR16RegClass.contains(DestReg, SrcReg)) {
> - // The copy of two FPR16 registers is implemented by the copy of two FPR32
> - const TargetRegisterInfo *TRI = &getRegisterInfo();
> - unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16,
> - &AArch64::FPR32RegClass);
> - unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16,
> - &AArch64::FPR32RegClass);
> - BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst)
> - .addReg(Src);
> - return;
> - } else {
> - CopyPhysRegTuple(MBB, I, DL, DestReg, SrcReg);
> - return;
> - }
> -
> - // E.g. ORR xDst, xzr, xSrc, lsl #0
> - BuildMI(MBB, I, DL, get(Opc), DestReg)
> - .addReg(ZeroReg)
> - .addReg(SrcReg)
> - .addImm(0);
> -}
> -
> -void AArch64InstrInfo::CopyPhysRegTuple(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator I,
> - DebugLoc DL, unsigned DestReg,
> - unsigned SrcReg) const {
> - unsigned SubRegs;
> - bool IsQRegs;
> - if (AArch64::DPairRegClass.contains(DestReg, SrcReg)) {
> - SubRegs = 2;
> - IsQRegs = false;
> - } else if (AArch64::DTripleRegClass.contains(DestReg, SrcReg)) {
> - SubRegs = 3;
> - IsQRegs = false;
> - } else if (AArch64::DQuadRegClass.contains(DestReg, SrcReg)) {
> - SubRegs = 4;
> - IsQRegs = false;
> - } else if (AArch64::QPairRegClass.contains(DestReg, SrcReg)) {
> - SubRegs = 2;
> - IsQRegs = true;
> - } else if (AArch64::QTripleRegClass.contains(DestReg, SrcReg)) {
> - SubRegs = 3;
> - IsQRegs = true;
> - } else if (AArch64::QQuadRegClass.contains(DestReg, SrcReg)) {
> - SubRegs = 4;
> - IsQRegs = true;
> - } else
> - llvm_unreachable("Unknown register class");
> -
> - unsigned BeginIdx = IsQRegs ? AArch64::qsub_0 : AArch64::dsub_0;
> - int Spacing = 1;
> - const TargetRegisterInfo *TRI = &getRegisterInfo();
> - // Copy register tuples backward when the first Dest reg overlaps
> - // with SrcReg.
> - if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
> - BeginIdx = BeginIdx + (SubRegs - 1);
> - Spacing = -1;
> - }
> -
> - unsigned Opc = IsQRegs ? AArch64::ORRvvv_16B : AArch64::ORRvvv_8B;
> - for (unsigned i = 0; i != SubRegs; ++i) {
> - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
> - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
> - assert(Dst && Src && "Bad sub-register");
> - BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
> - .addReg(Src)
> - .addReg(Src);
> - }
> - return;
> -}
> -
> -/// Does the Opcode represent a conditional branch that we can remove and re-add
> -/// at the end of a basic block?
> -static bool isCondBranch(unsigned Opc) {
> - return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx ||
> - Opc == AArch64::CBNZw || Opc == AArch64::CBNZx ||
> - Opc == AArch64::TBZwii || Opc == AArch64::TBZxii ||
> - Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii;
> -}
> -
> -/// Takes apart a given conditional branch MachineInstr (see isCondBranch),
> -/// setting TBB to the destination basic block and populating the Cond vector
> -/// with data necessary to recreate the conditional branch at a later
> -/// date. First element will be the opcode, and subsequent ones define the
> -/// conditions being branched on in an instruction-specific manner.
> -static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB,
> - SmallVectorImpl<MachineOperand> &Cond) {
> - switch(I->getOpcode()) {
> - case AArch64::Bcc:
> - case AArch64::CBZw:
> - case AArch64::CBZx:
> - case AArch64::CBNZw:
> - case AArch64::CBNZx:
> - // These instructions just have one predicate operand in position 0 (either
> - // a condition code or a register being compared).
> - Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
> - Cond.push_back(I->getOperand(0));
> - TBB = I->getOperand(1).getMBB();
> - return;
> - case AArch64::TBZwii:
> - case AArch64::TBZxii:
> - case AArch64::TBNZwii:
> - case AArch64::TBNZxii:
> - // These have two predicate operands: a register and a bit position.
> - Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
> - Cond.push_back(I->getOperand(0));
> - Cond.push_back(I->getOperand(1));
> - TBB = I->getOperand(2).getMBB();
> - return;
> - default:
> - llvm_unreachable("Unknown conditional branch to classify");
> - }
> -}
> -
> -
> -bool
> -AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
> - MachineBasicBlock *&FBB,
> - SmallVectorImpl<MachineOperand> &Cond,
> - bool AllowModify) const {
> - // If the block has no terminators, it just falls into the block after it.
> - MachineBasicBlock::iterator I = MBB.end();
> - if (I == MBB.begin())
> - return false;
> - --I;
> - while (I->isDebugValue()) {
> - if (I == MBB.begin())
> - return false;
> - --I;
> - }
> - if (!isUnpredicatedTerminator(I))
> - return false;
> -
> - // Get the last instruction in the block.
> - MachineInstr *LastInst = I;
> -
> - // If there is only one terminator instruction, process it.
> - unsigned LastOpc = LastInst->getOpcode();
> - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
> - if (LastOpc == AArch64::Bimm) {
> - TBB = LastInst->getOperand(0).getMBB();
> - return false;
> - }
> - if (isCondBranch(LastOpc)) {
> - classifyCondBranch(LastInst, TBB, Cond);
> - return false;
> - }
> - return true; // Can't handle indirect branch.
> - }
> -
> - // Get the instruction before it if it is a terminator.
> - MachineInstr *SecondLastInst = I;
> - unsigned SecondLastOpc = SecondLastInst->getOpcode();
> -
> - // If AllowModify is true and the block ends with two or more unconditional
> - // branches, delete all but the first unconditional branch.
> - if (AllowModify && LastOpc == AArch64::Bimm) {
> - while (SecondLastOpc == AArch64::Bimm) {
> - LastInst->eraseFromParent();
> - LastInst = SecondLastInst;
> - LastOpc = LastInst->getOpcode();
> - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
> - // Return now the only terminator is an unconditional branch.
> - TBB = LastInst->getOperand(0).getMBB();
> - return false;
> - } else {
> - SecondLastInst = I;
> - SecondLastOpc = SecondLastInst->getOpcode();
> - }
> - }
> - }
> -
> - // If there are three terminators, we don't know what sort of block this is.
> - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
> - return true;
> -
> - // If the block ends with a B and a Bcc, handle it.
> - if (LastOpc == AArch64::Bimm) {
> - if (SecondLastOpc == AArch64::Bcc) {
> - TBB = SecondLastInst->getOperand(1).getMBB();
> - Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
> - Cond.push_back(SecondLastInst->getOperand(0));
> - FBB = LastInst->getOperand(0).getMBB();
> - return false;
> - } else if (isCondBranch(SecondLastOpc)) {
> - classifyCondBranch(SecondLastInst, TBB, Cond);
> - FBB = LastInst->getOperand(0).getMBB();
> - return false;
> - }
> - }
> -
> - // If the block ends with two unconditional branches, handle it. The second
> - // one is not executed, so remove it.
> - if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
> - TBB = SecondLastInst->getOperand(0).getMBB();
> - I = LastInst;
> - if (AllowModify)
> - I->eraseFromParent();
> - return false;
> - }
> -
> - // Otherwise, can't handle this.
> - return true;
> -}
> -
> -bool AArch64InstrInfo::ReverseBranchCondition(
> - SmallVectorImpl<MachineOperand> &Cond) const {
> - switch (Cond[0].getImm()) {
> - case AArch64::Bcc: {
> - A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm());
> - CC = A64InvertCondCode(CC);
> - Cond[1].setImm(CC);
> - return false;
> - }
> - case AArch64::CBZw:
> - Cond[0].setImm(AArch64::CBNZw);
> - return false;
> - case AArch64::CBZx:
> - Cond[0].setImm(AArch64::CBNZx);
> - return false;
> - case AArch64::CBNZw:
> - Cond[0].setImm(AArch64::CBZw);
> - return false;
> - case AArch64::CBNZx:
> - Cond[0].setImm(AArch64::CBZx);
> - return false;
> - case AArch64::TBZwii:
> - Cond[0].setImm(AArch64::TBNZwii);
> - return false;
> - case AArch64::TBZxii:
> - Cond[0].setImm(AArch64::TBNZxii);
> - return false;
> - case AArch64::TBNZwii:
> - Cond[0].setImm(AArch64::TBZwii);
> - return false;
> - case AArch64::TBNZxii:
> - Cond[0].setImm(AArch64::TBZxii);
> - return false;
> - default:
> - llvm_unreachable("Unknown branch type");
> - }
> -}
> -
> -
> -unsigned
> -AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
> - MachineBasicBlock *FBB,
> - const SmallVectorImpl<MachineOperand> &Cond,
> - DebugLoc DL) const {
> - if (!FBB && Cond.empty()) {
> - BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB);
> - return 1;
> - } else if (!FBB) {
> - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
> - for (int i = 1, e = Cond.size(); i != e; ++i)
> - MIB.addOperand(Cond[i]);
> - MIB.addMBB(TBB);
> - return 1;
> - }
> -
> - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
> - for (int i = 1, e = Cond.size(); i != e; ++i)
> - MIB.addOperand(Cond[i]);
> - MIB.addMBB(TBB);
> -
> - BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB);
> - return 2;
> -}
> -
> -unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
> - MachineBasicBlock::iterator I = MBB.end();
> - if (I == MBB.begin()) return 0;
> - --I;
> - while (I->isDebugValue()) {
> - if (I == MBB.begin())
> - return 0;
> - --I;
> - }
> - if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode()))
> - return 0;
> -
> - // Remove the branch.
> - I->eraseFromParent();
> -
> - I = MBB.end();
> -
> - if (I == MBB.begin()) return 1;
> - --I;
> - if (!isCondBranch(I->getOpcode()))
> - return 1;
> -
> - // Remove the branch.
> - I->eraseFromParent();
> - return 2;
> -}
> -
> -bool
> -AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const {
> - MachineInstr &MI = *MBBI;
> - MachineBasicBlock &MBB = *MI.getParent();
> -
> - unsigned Opcode = MI.getOpcode();
> - switch (Opcode) {
> - case AArch64::TLSDESC_BLRx: {
> - MachineInstr *NewMI =
> - BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL))
> - .addOperand(MI.getOperand(1));
> - MI.setDesc(get(AArch64::BLRx));
> -
> - llvm::finalizeBundle(MBB, NewMI, *++MBBI);
> - return true;
> - }
> - default:
> - return false;
> - }
> -
> - return false;
> -}
> -
> -void
> -AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MBBI,
> - unsigned SrcReg, bool isKill,
> - int FrameIdx,
> - const TargetRegisterClass *RC,
> - const TargetRegisterInfo *TRI) const {
> - DebugLoc DL = MBB.findDebugLoc(MBBI);
> - MachineFunction &MF = *MBB.getParent();
> - MachineFrameInfo &MFI = *MF.getFrameInfo();
> - unsigned Align = MFI.getObjectAlignment(FrameIdx);
> -
> - MachineMemOperand *MMO
> - = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
> - MachineMemOperand::MOStore,
> - MFI.getObjectSize(FrameIdx),
> - Align);
> -
> - unsigned StoreOp = 0;
> - if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
> - switch(RC->getSize()) {
> - case 4: StoreOp = AArch64::LS32_STR; break;
> - case 8: StoreOp = AArch64::LS64_STR; break;
> - default:
> - llvm_unreachable("Unknown size for regclass");
> - }
> - } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) {
> - StoreOp = AArch64::LSFP8_STR;
> - } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) {
> - StoreOp = AArch64::LSFP16_STR;
> - } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
> - RC->hasType(MVT::f128)) {
> - switch (RC->getSize()) {
> - case 4: StoreOp = AArch64::LSFP32_STR; break;
> - case 8: StoreOp = AArch64::LSFP64_STR; break;
> - case 16: StoreOp = AArch64::LSFP128_STR; break;
> - default:
> - llvm_unreachable("Unknown size for regclass");
> - }
> - } else { // For a super register class has more than one sub registers
> - if (AArch64::DPairRegClass.hasSubClassEq(RC))
> - StoreOp = AArch64::ST1x2_8B;
> - else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
> - StoreOp = AArch64::ST1x3_8B;
> - else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
> - StoreOp = AArch64::ST1x4_8B;
> - else if (AArch64::QPairRegClass.hasSubClassEq(RC))
> - StoreOp = AArch64::ST1x2_16B;
> - else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
> - StoreOp = AArch64::ST1x3_16B;
> - else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
> - StoreOp = AArch64::ST1x4_16B;
> - else
> - llvm_unreachable("Unknown reg class");
> -
> - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
> - // Vector store has different operands from other store instructions.
> - NewMI.addFrameIndex(FrameIdx)
> - .addReg(SrcReg, getKillRegState(isKill))
> - .addMemOperand(MMO);
> - return;
> - }
> -
> - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
> - NewMI.addReg(SrcReg, getKillRegState(isKill))
> - .addFrameIndex(FrameIdx)
> - .addImm(0)
> - .addMemOperand(MMO);
> -
> -}
> -
> -void
> -AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MBBI,
> - unsigned DestReg, int FrameIdx,
> - const TargetRegisterClass *RC,
> - const TargetRegisterInfo *TRI) const {
> - DebugLoc DL = MBB.findDebugLoc(MBBI);
> - MachineFunction &MF = *MBB.getParent();
> - MachineFrameInfo &MFI = *MF.getFrameInfo();
> - unsigned Align = MFI.getObjectAlignment(FrameIdx);
> -
> - MachineMemOperand *MMO
> - = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
> - MachineMemOperand::MOLoad,
> - MFI.getObjectSize(FrameIdx),
> - Align);
> -
> - unsigned LoadOp = 0;
> - if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
> - switch(RC->getSize()) {
> - case 4: LoadOp = AArch64::LS32_LDR; break;
> - case 8: LoadOp = AArch64::LS64_LDR; break;
> - default:
> - llvm_unreachable("Unknown size for regclass");
> - }
> - } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) {
> - LoadOp = AArch64::LSFP8_LDR;
> - } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) {
> - LoadOp = AArch64::LSFP16_LDR;
> - } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
> - RC->hasType(MVT::f128)) {
> - switch (RC->getSize()) {
> - case 4: LoadOp = AArch64::LSFP32_LDR; break;
> - case 8: LoadOp = AArch64::LSFP64_LDR; break;
> - case 16: LoadOp = AArch64::LSFP128_LDR; break;
> - default:
> - llvm_unreachable("Unknown size for regclass");
> - }
> - } else { // For a super register class has more than one sub registers
> - if (AArch64::DPairRegClass.hasSubClassEq(RC))
> - LoadOp = AArch64::LD1x2_8B;
> - else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
> - LoadOp = AArch64::LD1x3_8B;
> - else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
> - LoadOp = AArch64::LD1x4_8B;
> - else if (AArch64::QPairRegClass.hasSubClassEq(RC))
> - LoadOp = AArch64::LD1x2_16B;
> - else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
> - LoadOp = AArch64::LD1x3_16B;
> - else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
> - LoadOp = AArch64::LD1x4_16B;
> - else
> - llvm_unreachable("Unknown reg class");
> -
> - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
> - // Vector load has different operands from other load instructions.
> - NewMI.addFrameIndex(FrameIdx)
> - .addMemOperand(MMO);
> - return;
> - }
> -
> - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
> - NewMI.addFrameIndex(FrameIdx)
> - .addImm(0)
> - .addMemOperand(MMO);
> -}
> -
> -unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const {
> - unsigned Limit = (1 << 16) - 1;
> - for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
> - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
> - I != E; ++I) {
> - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
> - if (!I->getOperand(i).isFI()) continue;
> -
> - // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff
> - // is the largest offset guaranteed to fit in the immediate offset.
> - if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) {
> - Limit = std::min(Limit, 0xfffu);
> - break;
> - }
> -
> - int AccessScale, MinOffset, MaxOffset;
> - getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset);
> - Limit = std::min(Limit, static_cast<unsigned>(MaxOffset));
> -
> - break; // At most one FI per instruction
> - }
> - }
> - }
> -
> - return Limit;
> -}
> -void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
> - int &AccessScale, int &MinOffset,
> - int &MaxOffset) const {
> - switch (MI.getOpcode()) {
> - default:
> - llvm_unreachable("Unknown load/store kind");
> - case TargetOpcode::DBG_VALUE:
> - AccessScale = 1;
> - MinOffset = INT_MIN;
> - MaxOffset = INT_MAX;
> - return;
> - case AArch64::LS8_LDR: case AArch64::LS8_STR:
> - case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR:
> - case AArch64::LDRSBw:
> - case AArch64::LDRSBx:
> - AccessScale = 1;
> - MinOffset = 0;
> - MaxOffset = 0xfff;
> - return;
> - case AArch64::LS16_LDR: case AArch64::LS16_STR:
> - case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR:
> - case AArch64::LDRSHw:
> - case AArch64::LDRSHx:
> - AccessScale = 2;
> - MinOffset = 0;
> - MaxOffset = 0xfff * AccessScale;
> - return;
> - case AArch64::LS32_LDR: case AArch64::LS32_STR:
> - case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR:
> - case AArch64::LDRSWx:
> - case AArch64::LDPSWx:
> - AccessScale = 4;
> - MinOffset = 0;
> - MaxOffset = 0xfff * AccessScale;
> - return;
> - case AArch64::LS64_LDR: case AArch64::LS64_STR:
> - case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR:
> - case AArch64::PRFM:
> - AccessScale = 8;
> - MinOffset = 0;
> - MaxOffset = 0xfff * AccessScale;
> - return;
> - case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR:
> - AccessScale = 16;
> - MinOffset = 0;
> - MaxOffset = 0xfff * AccessScale;
> - return;
> - case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR:
> - case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR:
> - AccessScale = 4;
> - MinOffset = -0x40 * AccessScale;
> - MaxOffset = 0x3f * AccessScale;
> - return;
> - case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR:
> - case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR:
> - AccessScale = 8;
> - MinOffset = -0x40 * AccessScale;
> - MaxOffset = 0x3f * AccessScale;
> - return;
> - case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR:
> - AccessScale = 16;
> - MinOffset = -0x40 * AccessScale;
> - MaxOffset = 0x3f * AccessScale;
> - return;
> - case AArch64::LD1x2_8B: case AArch64::ST1x2_8B:
> - AccessScale = 16;
> - MinOffset = 0;
> - MaxOffset = 0xfff * AccessScale;
> - return;
> - case AArch64::LD1x3_8B: case AArch64::ST1x3_8B:
> - AccessScale = 24;
> - MinOffset = 0;
> - MaxOffset = 0xfff * AccessScale;
> - return;
> - case AArch64::LD1x4_8B: case AArch64::ST1x4_8B:
> - case AArch64::LD1x2_16B: case AArch64::ST1x2_16B:
> - AccessScale = 32;
> - MinOffset = 0;
> - MaxOffset = 0xfff * AccessScale;
> - return;
> - case AArch64::LD1x3_16B: case AArch64::ST1x3_16B:
> - AccessScale = 48;
> - MinOffset = 0;
> - MaxOffset = 0xfff * AccessScale;
> - return;
> - case AArch64::LD1x4_16B: case AArch64::ST1x4_16B:
> - AccessScale = 64;
> - MinOffset = 0;
> - MaxOffset = 0xfff * AccessScale;
> - return;
> - }
> -}
> -
> -unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
> - const MCInstrDesc &MCID = MI.getDesc();
> - const MachineBasicBlock &MBB = *MI.getParent();
> - const MachineFunction &MF = *MBB.getParent();
> - const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo();
> -
> - if (MCID.getSize())
> - return MCID.getSize();
> -
> - if (MI.getOpcode() == AArch64::INLINEASM)
> - return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
> -
> - switch (MI.getOpcode()) {
> - case TargetOpcode::BUNDLE:
> - return getInstBundleLength(MI);
> - case TargetOpcode::IMPLICIT_DEF:
> - case TargetOpcode::KILL:
> - case TargetOpcode::CFI_INSTRUCTION:
> - case TargetOpcode::EH_LABEL:
> - case TargetOpcode::GC_LABEL:
> - case TargetOpcode::DBG_VALUE:
> - case AArch64::TLSDESCCALL:
> - return 0;
> - default:
> - llvm_unreachable("Unknown instruction class");
> - }
> -}
> -
> -unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
> - unsigned Size = 0;
> - MachineBasicBlock::const_instr_iterator I = MI;
> - MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
> - while (++I != E && I->isInsideBundle()) {
> - assert(!I->isBundle() && "No nested bundle!");
> - Size += getInstSizeInBytes(*I);
> - }
> - return Size;
> -}
> -
> -bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
> - unsigned FrameReg, int &Offset,
> - const AArch64InstrInfo &TII) {
> - MachineBasicBlock &MBB = *MI.getParent();
> - MachineFunction &MF = *MBB.getParent();
> - MachineFrameInfo &MFI = *MF.getFrameInfo();
> -
> - MFI.getObjectOffset(FrameRegIdx);
> - llvm_unreachable("Unimplemented rewriteFrameIndex");
> -}
> -
> -void llvm::emitRegUpdate(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MBBI,
> - DebugLoc dl, const TargetInstrInfo &TII,
> - unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
> - int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
> - if (NumBytes == 0 && DstReg == SrcReg)
> - return;
> - else if (abs64(NumBytes) & ~0xffffff) {
> - // Generically, we have to materialize the offset into a temporary register
> - // and subtract it. There are a couple of ways this could be done, for now
> - // we'll use a movz/movk or movn/movk sequence.
> - uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes));
> - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg)
> - .addImm(0xffff & Bits).addImm(0)
> - .setMIFlags(MIFlags);
> -
> - Bits >>= 16;
> - if (Bits & 0xffff) {
> - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
> - .addReg(ScratchReg)
> - .addImm(0xffff & Bits).addImm(1)
> - .setMIFlags(MIFlags);
> - }
> -
> - Bits >>= 16;
> - if (Bits & 0xffff) {
> - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
> - .addReg(ScratchReg)
> - .addImm(0xffff & Bits).addImm(2)
> - .setMIFlags(MIFlags);
> - }
> -
> - Bits >>= 16;
> - if (Bits & 0xffff) {
> - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
> - .addReg(ScratchReg)
> - .addImm(0xffff & Bits).addImm(3)
> - .setMIFlags(MIFlags);
> - }
> -
> - // ADD DST, SRC, xTMP (, lsl #0)
> - unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx;
> - BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg)
> - .addReg(SrcReg, RegState::Kill)
> - .addReg(ScratchReg, RegState::Kill)
> - .addImm(0)
> - .setMIFlag(MIFlags);
> - return;
> - }
> -
> - // Now we know that the adjustment can be done in at most two add/sub
> - // (immediate) instructions, which is always more efficient than a
> - // literal-pool load, or even a hypothetical movz/movk/add sequence
> -
> - // Decide whether we're doing addition or subtraction
> - unsigned LowOp, HighOp;
> - if (NumBytes >= 0) {
> - LowOp = AArch64::ADDxxi_lsl0_s;
> - HighOp = AArch64::ADDxxi_lsl12_s;
> - } else {
> - LowOp = AArch64::SUBxxi_lsl0_s;
> - HighOp = AArch64::SUBxxi_lsl12_s;
> - NumBytes = abs64(NumBytes);
> - }
> -
> - // If we're here, at the very least a move needs to be produced, which just
> - // happens to be materializable by an ADD.
> - if ((NumBytes & 0xfff) || NumBytes == 0) {
> - BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg)
> - .addReg(SrcReg, RegState::Kill)
> - .addImm(NumBytes & 0xfff)
> - .setMIFlag(MIFlags);
> -
> - // Next update should use the register we've just defined.
> - SrcReg = DstReg;
> - }
> -
> - if (NumBytes & 0xfff000) {
> - BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg)
> - .addReg(SrcReg, RegState::Kill)
> - .addImm(NumBytes >> 12)
> - .setMIFlag(MIFlags);
> - }
> -}
> -
> -void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
> - DebugLoc dl, const TargetInstrInfo &TII,
> - unsigned ScratchReg, int64_t NumBytes,
> - MachineInstr::MIFlag MIFlags) {
> - emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16,
> - NumBytes, MIFlags);
> -}
> -
> -
> -namespace {
> - struct LDTLSCleanup : public MachineFunctionPass {
> - static char ID;
> - LDTLSCleanup() : MachineFunctionPass(ID) {}
> -
> - bool runOnMachineFunction(MachineFunction &MF) override {
> - AArch64MachineFunctionInfo* MFI
> - = MF.getInfo<AArch64MachineFunctionInfo>();
> - if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
> - // No point folding accesses if there isn't at least two.
> - return false;
> - }
> -
> - MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
> - return VisitNode(DT->getRootNode(), 0);
> - }
> -
> - // Visit the dominator subtree rooted at Node in pre-order.
> - // If TLSBaseAddrReg is non-null, then use that to replace any
> - // TLS_base_addr instructions. Otherwise, create the register
> - // when the first such instruction is seen, and then use it
> - // as we encounter more instructions.
> - bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
> - MachineBasicBlock *BB = Node->getBlock();
> - bool Changed = false;
> -
> - // Traverse the current block.
> - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
> - ++I) {
> - switch (I->getOpcode()) {
> - case AArch64::TLSDESC_BLRx:
> - // Make sure it's a local dynamic access.
> - if (!I->getOperand(1).isSymbol() ||
> - strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
> - break;
> -
> - if (TLSBaseAddrReg)
> - I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
> - else
> - I = SetRegister(I, &TLSBaseAddrReg);
> - Changed = true;
> - break;
> - default:
> - break;
> - }
> - }
> -
> - // Visit the children of this block in the dominator tree.
> - for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
> - I != E; ++I) {
> - Changed |= VisitNode(*I, TLSBaseAddrReg);
> - }
> -
> - return Changed;
> - }
> -
> - // Replace the TLS_base_addr instruction I with a copy from
> - // TLSBaseAddrReg, returning the new instruction.
> - MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
> - unsigned TLSBaseAddrReg) {
> - MachineFunction *MF = I->getParent()->getParent();
> - const AArch64TargetMachine *TM =
> - static_cast<const AArch64TargetMachine *>(&MF->getTarget());
> - const AArch64InstrInfo *TII = TM->getInstrInfo();
> -
> - // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
> - // code sequence assumes the address will be.
> - MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
> - TII->get(TargetOpcode::COPY),
> - AArch64::X0)
> - .addReg(TLSBaseAddrReg);
> -
> - // Erase the TLS_base_addr instruction.
> - I->eraseFromParent();
> -
> - return Copy;
> - }
> -
> - // Create a virtal register in *TLSBaseAddrReg, and populate it by
> - // inserting a copy instruction after I. Returns the new instruction.
> - MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
> - MachineFunction *MF = I->getParent()->getParent();
> - const AArch64TargetMachine *TM =
> - static_cast<const AArch64TargetMachine *>(&MF->getTarget());
> - const AArch64InstrInfo *TII = TM->getInstrInfo();
> -
> - // Create a virtual register for the TLS base address.
> - MachineRegisterInfo &RegInfo = MF->getRegInfo();
> - *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
> -
> - // Insert a copy from X0 to TLSBaseAddrReg for later.
> - MachineInstr *Next = I->getNextNode();
> - MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
> - TII->get(TargetOpcode::COPY),
> - *TLSBaseAddrReg)
> - .addReg(AArch64::X0);
> -
> - return Copy;
> - }
> -
> - const char *getPassName() const override {
> - return "Local Dynamic TLS Access Clean-up";
> - }
> -
> - void getAnalysisUsage(AnalysisUsage &AU) const override {
> - AU.setPreservesCFG();
> - AU.addRequired<MachineDominatorTree>();
> - MachineFunctionPass::getAnalysisUsage(AU);
> - }
> - };
> -}
> -
> -char LDTLSCleanup::ID = 0;
> -FunctionPass*
> -llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (removed)
> @@ -1,112 +0,0 @@
> -//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file contains the AArch64 implementation of the TargetInstrInfo class.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#ifndef LLVM_TARGET_AARCH64INSTRINFO_H
> -#define LLVM_TARGET_AARCH64INSTRINFO_H
> -
> -#include "AArch64RegisterInfo.h"
> -#include "llvm/Target/TargetInstrInfo.h"
> -
> -#define GET_INSTRINFO_HEADER
> -#include "AArch64GenInstrInfo.inc"
> -
> -namespace llvm {
> -
> -class AArch64Subtarget;
> -
> -class AArch64InstrInfo : public AArch64GenInstrInfo {
> - const AArch64RegisterInfo RI;
> - const AArch64Subtarget &Subtarget;
> -public:
> - explicit AArch64InstrInfo(const AArch64Subtarget &TM);
> -
> - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
> - /// such, whenever a client has an instance of instruction info, it should
> - /// always be able to get register info as well (through this method).
> - ///
> - const TargetRegisterInfo &getRegisterInfo() const { return RI; }
> -
> - const AArch64Subtarget &getSubTarget() const { return Subtarget; }
> -
> - void copyPhysReg(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator I, DebugLoc DL,
> - unsigned DestReg, unsigned SrcReg,
> - bool KillSrc) const override;
> - void CopyPhysRegTuple(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator I, DebugLoc DL,
> - unsigned DestReg, unsigned SrcReg) const;
> -
> - void storeRegToStackSlot(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MI,
> - unsigned SrcReg, bool isKill, int FrameIndex,
> - const TargetRegisterClass *RC,
> - const TargetRegisterInfo *TRI) const override;
> - void loadRegFromStackSlot(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MBBI,
> - unsigned DestReg, int FrameIdx,
> - const TargetRegisterClass *RC,
> - const TargetRegisterInfo *TRI) const override;
> -
> - bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
> - MachineBasicBlock *&FBB,
> - SmallVectorImpl<MachineOperand> &Cond,
> - bool AllowModify = false) const override;
> - unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
> - MachineBasicBlock *FBB,
> - const SmallVectorImpl<MachineOperand> &Cond,
> - DebugLoc DL) const override;
> - unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
> - bool
> - ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
> -
> - bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
> -
> - /// Look through the instructions in this function and work out the largest
> - /// the stack frame can be while maintaining the ability to address local
> - /// slots with no complexities.
> - unsigned estimateRSStackLimit(MachineFunction &MF) const;
> -
> - /// getAddressConstraints - For loads and stores (and PRFMs) taking an
> - /// immediate offset, this function determines the constraints required for
> - /// the immediate. It must satisfy:
> - /// + MinOffset <= imm <= MaxOffset
> - /// + imm % OffsetScale == 0
> - void getAddressConstraints(const MachineInstr &MI, int &AccessScale,
> - int &MinOffset, int &MaxOffset) const;
> -
> -
> - unsigned getInstSizeInBytes(const MachineInstr &MI) const;
> -
> - unsigned getInstBundleLength(const MachineInstr &MI) const;
> -
> -};
> -
> -bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
> - unsigned FrameReg, int &Offset,
> - const AArch64InstrInfo &TII);
> -
> -
> -void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
> - DebugLoc dl, const TargetInstrInfo &TII,
> - unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
> - int64_t NumBytes,
> - MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
> -
> -void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
> - DebugLoc dl, const TargetInstrInfo &TII,
> - unsigned ScratchReg, int64_t NumBytes,
> - MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
> -
> -}
> -
> -#endif
>
> Removed: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=209575&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (removed)
> @@ -1,5388 +0,0 @@
> -//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// This file describes the AArch64 scalar instructions in TableGen format.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -//===----------------------------------------------------------------------===//
> -// ARM Instruction Predicate Definitions.
> -//
> -def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
> - AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
> -def HasNEON : Predicate<"Subtarget->hasNEON()">,
> - AssemblerPredicate<"FeatureNEON", "neon">;
> -def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
> - AssemblerPredicate<"FeatureCrypto","crypto">;
> -
> -// Use fused MAC if more precision in FP computation is allowed.
> -def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
> - " FPOpFusion::Fast)">;
> -include "AArch64InstrFormats.td"
> -
> -//===----------------------------------------------------------------------===//
> -// AArch64 specific pattern fragments.
> -//
> -// An 'fmul' node with a single use.
> -def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{
> - return N->hasOneUse();
> -}]>;
> -
> -
> -//===----------------------------------------------------------------------===//
> -// Target-specific ISD nodes and profiles
> -//===----------------------------------------------------------------------===//
> -
> -def SDT_A64ret : SDTypeProfile<0, 0, []>;
> -def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain,
> - SDNPOptInGlue,
> - SDNPVariadic]>;
> -
> -// (ins NZCV, Condition, Dest)
> -def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>;
> -def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>;
> -
> -// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition)
> -def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>,
> - SDTCisSameAs<0, 2>,
> - SDTCisSameAs<2, 3>]>;
> -def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>;
> -
> -// (outs NZCV), (ins LHS, RHS, Condition)
> -def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
> - SDTCisSameAs<1, 2>]>;
> -def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>;
> -
> -
> -// (outs GPR64), (ins)
> -def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
> -
> -// A64 compares don't care about the cond really (they set all flags) so a
> -// simple binary operator is useful.
> -def A64cmp : PatFrag<(ops node:$lhs, node:$rhs),
> - (A64setcc node:$lhs, node:$rhs, cond)>;
> -
> -
> -// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN
> -// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C
> -// and V flags can be set differently by this operation. It comes down to
> -// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are
> -// then everything is fine. If not then the optimization is wrong. Thus general
> -// comparisons are only valid if op2 != 0.
> -
> -// So, finally, the only LLVM-native comparisons that don't mention C and V are
> -// SETEQ and SETNE. They're the only ones we can safely use CMN for in the
> -// absence of information about op2.
> -def equality_cond : PatLeaf<(cond), [{
> - return N->get() == ISD::SETEQ || N->get() == ISD::SETNE;
> -}]>;
> -
> -def A64cmn : PatFrag<(ops node:$lhs, node:$rhs),
> - (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>;
> -
> -// There are two layers of indirection here, driven by the following
> -// considerations.
> -// + TableGen does not know CodeModel or Reloc so that decision should be
> -// made for a variable/address at ISelLowering.
> -// + The output of ISelLowering should be selectable (hence the Wrapper,
> -// rather than a bare target opcode)
> -def SDTAArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
> - SDTCisSameAs<0, 2>,
> - SDTCisSameAs<0, 3>,
> - SDTCisSameAs<0, 4>,
> - SDTCisPtrTy<0>]>;
> -
> -def A64WrapperLarge :SDNode<"AArch64ISD::WrapperLarge", SDTAArch64WrapperLarge>;
> -
> -def SDTAArch64WrapperSmall : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
> - SDTCisSameAs<1, 2>,
> - SDTCisVT<3, i32>,
> - SDTCisPtrTy<0>]>;
> -
> -def A64WrapperSmall :SDNode<"AArch64ISD::WrapperSmall", SDTAArch64WrapperSmall>;
> -
> -
> -def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
> -def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad,
> - [SDNPHasChain]>;
> -
> -
> -// (A64BFI LHS, RHS, LSB, Width)
> -def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
> - SDTCisSameAs<1, 2>,
> - SDTCisVT<3, i64>,
> - SDTCisVT<4, i64>]>;
> -
> -def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>;
> -
> -// (A64EXTR HiReg, LoReg, LSB)
> -def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
> - SDTCisVT<3, i64>]>;
> -def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>;
> -
> -// (A64[SU]BFX Field, ImmR, ImmS).
> -//
> -// Note that ImmR and ImmS are already encoded for the actual instructions. The
> -// more natural LSB and Width mix together to form ImmR and ImmS, something
> -// which TableGen can't handle.
> -def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>;
> -def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>;
> -
> -def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>;
> -
> -class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
> -
> -//===----------------------------------------------------------------------===//
> -// Call sequence pseudo-instructions
> -//===----------------------------------------------------------------------===//
> -
> -
> -def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
> -def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call,
> - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
> -
> -def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call,
> - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
> -
> -// The TLSDESCCALL node is a variant call which goes to an indirectly calculated
> -// destination but needs a relocation against a fixed symbol. As such it has two
> -// certain operands: the callee and the relocated variable.
> -//
> -// The TLS ABI only allows it to be selected to a BLR instructin (with
> -// appropriate relocation).
> -def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
> -
> -def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall,
> - [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
> - SDNPVariadic]>;
> -
> -
> -def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>;
> -def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart,
> - [SDNPHasChain, SDNPOutGlue]>;
> -
> -def SDT_AArch64CallSeqEnd : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>;
> -def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd,
> - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
> -
> -
> -
> -// These pseudo-instructions have special semantics by virtue of being passed to
> -// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by
> -// LowerCall to (in our case) tell the back-end about stack adjustments for
> -// arguments passed on the stack. Here we select those markers to
> -// pseudo-instructions which explicitly set the stack, and finally in the
> -// RegisterInfo we convert them to a true stack adjustment.
> -let Defs = [XSP], Uses = [XSP] in {
> - def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt),
> - [(AArch64callseq_start timm:$amt)]>;
> -
> - def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2),
> - [(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Atomic operation pseudo-instructions
> -//===----------------------------------------------------------------------===//
> -
> -// These get selected from C++ code as a pretty much direct translation from the
> -// generic DAG nodes. The one exception is the AtomicOrdering is added as an
> -// operand so that the eventual lowering can make use of it and choose
> -// acquire/release operations when required.
> -
> -let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
> -multiclass AtomicSizes {
> - def _I8 : PseudoInst<(outs GPR32:$dst),
> - (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
> - def _I16 : PseudoInst<(outs GPR32:$dst),
> - (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
> - def _I32 : PseudoInst<(outs GPR32:$dst),
> - (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
> - def _I64 : PseudoInst<(outs GPR64:$dst),
> - (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
> -}
> -}
> -
> -defm ATOMIC_LOAD_ADD : AtomicSizes;
> -defm ATOMIC_LOAD_SUB : AtomicSizes;
> -defm ATOMIC_LOAD_AND : AtomicSizes;
> -defm ATOMIC_LOAD_OR : AtomicSizes;
> -defm ATOMIC_LOAD_XOR : AtomicSizes;
> -defm ATOMIC_LOAD_NAND : AtomicSizes;
> -defm ATOMIC_SWAP : AtomicSizes;
> -let Defs = [NZCV] in {
> - // These operations need a CMP to calculate the correct value
> - defm ATOMIC_LOAD_MIN : AtomicSizes;
> - defm ATOMIC_LOAD_MAX : AtomicSizes;
> - defm ATOMIC_LOAD_UMIN : AtomicSizes;
> - defm ATOMIC_LOAD_UMAX : AtomicSizes;
> -}
> -
> -class AtomicCmpSwap<RegisterClass GPRData>
> - : PseudoInst<(outs GPRData:$dst),
> - (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new,
> - i32imm:$ordering), []> {
> - let usesCustomInserter = 1;
> - let hasCtrlDep = 1;
> - let mayLoad = 1;
> - let mayStore = 1;
> - let Defs = [NZCV];
> -}
> -
> -def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<GPR32>;
> -def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
> -def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
> -def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
> -
> -//===----------------------------------------------------------------------===//
> -// Add-subtract (extended register) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP
> -
> -// The RHS of these operations is conceptually a sign/zero-extended
> -// register, optionally shifted left by 1-4. The extension can be a
> -// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but
> -// must be specified with one exception:
> -
> -// If one of the registers is sp/wsp then LSL is an alias for UXTW in
> -// 32-bit instructions and UXTX in 64-bit versions, the shift amount
> -// is not optional in that case (but can explicitly be 0), and the
> -// entire suffix can be skipped (e.g. "add sp, x3, x2").
> -
> -multiclass extend_operands<string PREFIX, string Diag> {
> - def _asmoperand : AsmOperandClass {
> - let Name = PREFIX;
> - let RenderMethod = "addRegExtendOperands";
> - let PredicateMethod = "isRegExtend<A64SE::" # PREFIX # ">";
> - let DiagnosticType = "AddSubRegExtend" # Diag;
> - }
> -
> - def _operand : Operand<i64>,
> - ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 4; }]> {
> - let PrintMethod = "printRegExtendOperand<A64SE::" # PREFIX # ">";
> - let DecoderMethod = "DecodeRegExtendOperand";
> - let ParserMatchClass = !cast<AsmOperandClass>(PREFIX # "_asmoperand");
> - }
> -}
> -
> -defm UXTB : extend_operands<"UXTB", "Small">;
> -defm UXTH : extend_operands<"UXTH", "Small">;
> -defm UXTW : extend_operands<"UXTW", "Small">;
> -defm UXTX : extend_operands<"UXTX", "Large">;
> -defm SXTB : extend_operands<"SXTB", "Small">;
> -defm SXTH : extend_operands<"SXTH", "Small">;
> -defm SXTW : extend_operands<"SXTW", "Small">;
> -defm SXTX : extend_operands<"SXTX", "Large">;
> -
> -def LSL_extasmoperand : AsmOperandClass {
> - let Name = "RegExtendLSL";
> - let RenderMethod = "addRegExtendOperands";
> - let DiagnosticType = "AddSubRegExtendLarge";
> -}
> -
> -def LSL_extoperand : Operand<i64> {
> - let ParserMatchClass = LSL_extasmoperand;
> -}
> -
> -
> -// The patterns for various sign-extensions are a little ugly and
> -// non-uniform because everything has already been promoted to the
> -// legal i64 and i32 types. We'll wrap the various variants up in a
> -// class for use later.
> -class extend_types {
> - dag uxtb; dag uxth; dag uxtw; dag uxtx;
> - dag sxtb; dag sxth; dag sxtw; dag sxtx;
> - ValueType ty;
> - RegisterClass GPR;
> -}
> -
> -def extends_to_i64 : extend_types {
> - let uxtb = (and (anyext i32:$Rm), 255);
> - let uxth = (and (anyext i32:$Rm), 65535);
> - let uxtw = (zext i32:$Rm);
> - let uxtx = (i64 $Rm);
> -
> - let sxtb = (sext_inreg (anyext i32:$Rm), i8);
> - let sxth = (sext_inreg (anyext i32:$Rm), i16);
> - let sxtw = (sext i32:$Rm);
> - let sxtx = (i64 $Rm);
> -
> - let ty = i64;
> - let GPR = GPR64xsp;
> -}
> -
> -
> -def extends_to_i32 : extend_types {
> - let uxtb = (and i32:$Rm, 255);
> - let uxth = (and i32:$Rm, 65535);
> - let uxtw = (i32 i32:$Rm);
> - let uxtx = (i32 i32:$Rm);
> -
> - let sxtb = (sext_inreg i32:$Rm, i8);
> - let sxth = (sext_inreg i32:$Rm, i16);
> - let sxtw = (i32 i32:$Rm);
> - let sxtx = (i32 i32:$Rm);
> -
> - let ty = i32;
> - let GPR = GPR32wsp;
> -}
> -
> -// Now, six of the extensions supported are easy and uniform: if the source size
> -// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate
> -// those instructions in one block.
> -
> -// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me:
> -// + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would
> -// be impossible.
> -// + Patterns are very different as well.
> -// + Passing different registers would be ugly (more fields in extend_types
> -// would probably be the best option).
> -multiclass addsub_exts<bit sf, bit op, bit S, string asmop,
> - SDPatternOperator opfrag,
> - dag outs, extend_types exts> {
> - def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000,
> - outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTB_operand:$Imm3),
> - !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> - [(opfrag exts.ty:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> - def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001,
> - outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTH_operand:$Imm3),
> - !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> - [(opfrag exts.ty:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> - def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010,
> - outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTW_operand:$Imm3),
> - !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> - [(opfrag exts.ty:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> - def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100,
> - outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTB_operand:$Imm3),
> - !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> - [(opfrag exts.ty:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> - def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101,
> - outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTH_operand:$Imm3),
> - !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> - [(opfrag exts.ty:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> - def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110,
> - outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTW_operand:$Imm3),
> - !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> - [(opfrag exts.ty:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -}
> -
> -// These two could be merge in with the above, but their patterns aren't really
> -// necessary and the naming-scheme would necessarily break:
> -multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag,
> - dag outs> {
> - def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011,
> - outs,
> - (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3),
> - !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> - [(opfrag i64:$Rn, (shl i64:$Rm, UXTX_operand:$Imm3))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> - def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111,
> - outs,
> - (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3),
> - !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> - [/* No Pattern: same as uxtx */],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -}
> -
> -multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs> {
> - def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011,
> - outs, (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3),
> - !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> - [/* No pattern: probably same as uxtw */],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> - def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111,
> - outs, (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3),
> - !strconcat(asmop, "$Rn, $Rm, $Imm3"),
> - [/* No Pattern: probably same as uxtw */],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -}
> -
> -class SetRD<RegisterClass RC, SDPatternOperator op>
> - : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>;
> -class SetNZCV<SDPatternOperator op>
> - : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>;
> -
> -defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
> - (outs GPR64xsp:$Rd), extends_to_i64>,
> - addsub_xxtx< 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
> - (outs GPR64xsp:$Rd)>;
> -defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>,
> - (outs GPR32wsp:$Rd), extends_to_i32>,
> - addsub_wxtx< 0b0, 0b0, "add\t$Rd, ",
> - (outs GPR32wsp:$Rd)>;
> -defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
> - (outs GPR64xsp:$Rd), extends_to_i64>,
> - addsub_xxtx< 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
> - (outs GPR64xsp:$Rd)>;
> -defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>,
> - (outs GPR32wsp:$Rd), extends_to_i32>,
> - addsub_wxtx< 0b1, 0b0, "sub\t$Rd, ",
> - (outs GPR32wsp:$Rd)>;
> -
> -let Defs = [NZCV] in {
> -defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
> - (outs GPR64:$Rd), extends_to_i64>,
> - addsub_xxtx< 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
> - (outs GPR64:$Rd)>;
> -defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>,
> - (outs GPR32:$Rd), extends_to_i32>,
> - addsub_wxtx< 0b0, 0b1, "adds\t$Rd, ",
> - (outs GPR32:$Rd)>;
> -defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
> - (outs GPR64:$Rd), extends_to_i64>,
> - addsub_xxtx< 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
> - (outs GPR64:$Rd)>;
> -defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>,
> - (outs GPR32:$Rd), extends_to_i32>,
> - addsub_wxtx< 0b1, 0b1, "subs\t$Rd, ",
> - (outs GPR32:$Rd)>;
> -
> -
> -let SchedRW = [WriteCMP, ReadCMP, ReadCMP], Rd = 0b11111, isCompare = 1 in {
> -defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
> - (outs), extends_to_i64>,
> - addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>;
> -defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
> - (outs), extends_to_i32>,
> - addsub_wxtx< 0b0, 0b1, "cmn\t", (outs)>;
> -defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
> - (outs), extends_to_i64>,
> - addsub_xxtx< 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>;
> -defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
> - (outs), extends_to_i32>,
> - addsub_wxtx< 0b1, 0b1, "cmp\t", (outs)>;
> -}
> -}
> -
> -// Now patterns for the operation without a shift being needed. No patterns are
> -// created for uxtx/sxtx since they're non-uniform and it's expected that
> -// add/sub (shifted register) will handle those cases anyway.
> -multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop,
> - extend_types exts> {
> - def : Pat<(nodeop exts.ty:$Rn, exts.uxtb),
> - (!cast<Instruction>(prefix # "w_uxtb") $Rn, $Rm, 0)>;
> - def : Pat<(nodeop exts.ty:$Rn, exts.uxth),
> - (!cast<Instruction>(prefix # "w_uxth") $Rn, $Rm, 0)>;
> - def : Pat<(nodeop exts.ty:$Rn, exts.uxtw),
> - (!cast<Instruction>(prefix # "w_uxtw") $Rn, $Rm, 0)>;
> -
> - def : Pat<(nodeop exts.ty:$Rn, exts.sxtb),
> - (!cast<Instruction>(prefix # "w_sxtb") $Rn, $Rm, 0)>;
> - def : Pat<(nodeop exts.ty:$Rn, exts.sxth),
> - (!cast<Instruction>(prefix # "w_sxth") $Rn, $Rm, 0)>;
> - def : Pat<(nodeop exts.ty:$Rn, exts.sxtw),
> - (!cast<Instruction>(prefix # "w_sxtw") $Rn, $Rm, 0)>;
> -}
> -
> -defm : addsubext_noshift_patterns<"ADDxx", add, extends_to_i64>;
> -defm : addsubext_noshift_patterns<"ADDww", add, extends_to_i32>;
> -defm : addsubext_noshift_patterns<"SUBxx", sub, extends_to_i64>;
> -defm : addsubext_noshift_patterns<"SUBww", sub, extends_to_i32>;
> -
> -defm : addsubext_noshift_patterns<"CMNx", A64cmn, extends_to_i64>;
> -defm : addsubext_noshift_patterns<"CMNw", A64cmn, extends_to_i32>;
> -defm : addsubext_noshift_patterns<"CMPx", A64cmp, extends_to_i64>;
> -defm : addsubext_noshift_patterns<"CMPw", A64cmp, extends_to_i32>;
> -
> -// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is
> -// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the
> -// operation. Also permitted in this case is complete omission of the argument,
> -// which implies "lsl #0".
> -multiclass lsl_aliases<string asmop, Instruction inst, RegisterClass GPR_Rd,
> - RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
> - def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
> - (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
> -
> - def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm, $LSL"),
> - (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL),
> - 0>;
> -
> -}
> -
> -defm : lsl_aliases<"add", ADDxxx_uxtx, Rxsp, GPR64xsp, GPR64>;
> -defm : lsl_aliases<"add", ADDxxx_uxtx, GPR64xsp, Rxsp, GPR64>;
> -defm : lsl_aliases<"add", ADDwww_uxtw, Rwsp, GPR32wsp, GPR32>;
> -defm : lsl_aliases<"add", ADDwww_uxtw, GPR32wsp, Rwsp, GPR32>;
> -defm : lsl_aliases<"sub", SUBxxx_uxtx, Rxsp, GPR64xsp, GPR64>;
> -defm : lsl_aliases<"sub", SUBxxx_uxtx, GPR64xsp, Rxsp, GPR64>;
> -defm : lsl_aliases<"sub", SUBwww_uxtw, Rwsp, GPR32wsp, GPR32>;
> -defm : lsl_aliases<"sub", SUBwww_uxtw, GPR32wsp, Rwsp, GPR32>;
> -
> -// Rd cannot be sp for flag-setting variants so only half of the aliases are
> -// needed.
> -defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>;
> -defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>;
> -defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>;
> -defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>;
> -
> -// CMP unfortunately has to be different because the instruction doesn't have a
> -// dest register.
> -multiclass cmp_lsl_aliases<string asmop, Instruction inst,
> - RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
> - def : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
> - (inst GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
> -
> - def : InstAlias<!strconcat(asmop, " $Rn, $Rm, $LSL"),
> - (inst GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
> -}
> -
> -defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>;
> -defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>;
> -defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>;
> -defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>;
> -
> -//===----------------------------------------------------------------------===//
> -// Add-subtract (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV
> -
> -// These instructions accept a 12-bit unsigned immediate, optionally shifted
> -// left by 12 bits. Official assembly format specifies a 12 bit immediate with
> -// one of "", "LSL #0", "LSL #12" supplementary operands.
> -
> -// There are surprisingly few ways to make this work with TableGen, so this
> -// implementation has separate instructions for the "LSL #0" and "LSL #12"
> -// variants.
> -
> -// If the MCInst retained a single combined immediate (which could be 0x123000,
> -// for example) then both components (imm & shift) would have to be delegated to
> -// a single assembly operand. This would entail a separate operand parser
> -// (because the LSL would have to live in the same AArch64Operand as the
> -// immediate to be accessible); assembly parsing is rather complex and
> -// error-prone C++ code.
> -//
> -// By splitting the immediate, we can delegate handling this optional operand to
> -// an InstAlias. Supporting functions to generate the correct MCInst are still
> -// required, but these are essentially trivial and parsing can remain generic.
> -//
> -// Rejected plans with rationale:
> -// ------------------------------
> -//
> -// In an ideal world you'de have two first class immediate operands (in
> -// InOperandList, specifying imm12 and shift). Unfortunately this is not
> -// selectable by any means I could discover.
> -//
> -// An Instruction with two MCOperands hidden behind a single entry in
> -// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional,
> -// but required more C++ code to handle encoding/decoding. Parsing (the intended
> -// main beneficiary) ended up equally complex because of the optional nature of
> -// "LSL #0".
> -//
> -// Attempting to circumvent the need for a custom OperandParser above by giving
> -// InstAliases without the "lsl #0" failed. add/sub could be accommodated but
> -// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands
> -// should be parsed: there was no way to accommodate an "lsl #12".
> -
> -let ParserMethod = "ParseImmWithLSLOperand",
> - RenderMethod = "addImmWithLSLOperands" in {
> - // Derived PredicateMethod fields are different for each
> - def addsubimm_lsl0_asmoperand : AsmOperandClass {
> - let Name = "AddSubImmLSL0";
> - // If an error is reported against this operand, instruction could also be a
> - // register variant.
> - let DiagnosticType = "AddSubSecondSource";
> - }
> -
> - def addsubimm_lsl12_asmoperand : AsmOperandClass {
> - let Name = "AddSubImmLSL12";
> - let DiagnosticType = "AddSubSecondSource";
> - }
> -}
> -
> -def shr_12_XFORM : SDNodeXForm<imm, [{
> - return CurDAG->getTargetConstant(N->getSExtValue() >> 12, MVT::i32);
> -}]>;
> -
> -def shr_12_neg_XFORM : SDNodeXForm<imm, [{
> - return CurDAG->getTargetConstant((-N->getSExtValue()) >> 12, MVT::i32);
> -}]>;
> -
> -def neg_XFORM : SDNodeXForm<imm, [{
> - return CurDAG->getTargetConstant(-N->getSExtValue(), MVT::i32);
> -}]>;
> -
> -
> -multiclass addsub_imm_operands<ValueType ty> {
> - let PrintMethod = "printAddSubImmLSL0Operand",
> - EncoderMethod = "getAddSubImmOpValue",
> - ParserMatchClass = addsubimm_lsl0_asmoperand in {
> - def _posimm_lsl0 : Operand<ty>,
> - ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff) == 0; }]>;
> - def _negimm_lsl0 : Operand<ty>,
> - ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff) == 0; }],
> - neg_XFORM>;
> - }
> -
> - let PrintMethod = "printAddSubImmLSL12Operand",
> - EncoderMethod = "getAddSubImmOpValue",
> - ParserMatchClass = addsubimm_lsl12_asmoperand in {
> - def _posimm_lsl12 : Operand<ty>,
> - ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff000) == 0; }],
> - shr_12_XFORM>;
> -
> - def _negimm_lsl12 : Operand<ty>,
> - ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff000) == 0; }],
> - shr_12_neg_XFORM>;
> - }
> -}
> -
> -// The add operands don't need any transformation
> -defm addsubimm_operand_i32 : addsub_imm_operands<i32>;
> -defm addsubimm_operand_i64 : addsub_imm_operands<i64>;
> -
> -multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
> - string asmop, string cmpasmop,
> - Operand imm_operand, Operand cmp_imm_operand,
> - RegisterClass GPR, RegisterClass GPRsp,
> - AArch64Reg ZR, ValueType Ty> {
> - // All registers for non-S variants allow SP
> - def _s : A64I_addsubimm<sf, op, 0b0, shift,
> - (outs GPRsp:$Rd),
> - (ins GPRsp:$Rn, imm_operand:$Imm12),
> - !strconcat(asmop, "\t$Rd, $Rn, $Imm12"),
> - [(set Ty:$Rd, (add Ty:$Rn, imm_operand:$Imm12))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU]>;
> -
> -
> - // S variants can read SP but would write to ZR
> - def _S : A64I_addsubimm<sf, op, 0b1, shift,
> - (outs GPR:$Rd),
> - (ins GPRsp:$Rn, imm_operand:$Imm12),
> - !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"),
> - [(set Ty:$Rd, (addc Ty:$Rn, imm_operand:$Imm12))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU]> {
> - let Defs = [NZCV];
> - }
> -
> - // Note that the pattern here for ADDS is subtle. Canonically CMP
> - // a, b becomes SUBS a, b. If b < 0 then this is equivalent to
> - // ADDS a, (-b). This is not true in general.
> - def _cmp : A64I_addsubimm<sf, op, 0b1, shift,
> - (outs), (ins GPRsp:$Rn, imm_operand:$Imm12),
> - !strconcat(cmpasmop, " $Rn, $Imm12"),
> - [(set NZCV,
> - (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))],
> - NoItinerary>,
> - Sched<[WriteCMP, ReadCMP]> {
> - let Rd = 0b11111;
> - let Defs = [NZCV];
> - let isCompare = 1;
> - }
> -}
> -
> -
> -multiclass addsubimm_shifts<string prefix, bit sf, bit op,
> - string asmop, string cmpasmop, string operand, string cmpoperand,
> - RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR,
> - ValueType Ty> {
> - defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00,
> - asmop, cmpasmop,
> - !cast<Operand>(operand # "_lsl0"),
> - !cast<Operand>(cmpoperand # "_lsl0"),
> - GPR, GPRsp, ZR, Ty>;
> -
> - defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01,
> - asmop, cmpasmop,
> - !cast<Operand>(operand # "_lsl12"),
> - !cast<Operand>(cmpoperand # "_lsl12"),
> - GPR, GPRsp, ZR, Ty>;
> -}
> -
> -defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn",
> - "addsubimm_operand_i32_posimm",
> - "addsubimm_operand_i32_negimm",
> - GPR32, GPR32wsp, WZR, i32>;
> -defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn",
> - "addsubimm_operand_i64_posimm",
> - "addsubimm_operand_i64_negimm",
> - GPR64, GPR64xsp, XZR, i64>;
> -defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp",
> - "addsubimm_operand_i32_negimm",
> - "addsubimm_operand_i32_posimm",
> - GPR32, GPR32wsp, WZR, i32>;
> -defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp",
> - "addsubimm_operand_i64_negimm",
> - "addsubimm_operand_i64_posimm",
> - GPR64, GPR64xsp, XZR, i64>;
> -
> -multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop> {
> - def _fromsp : InstAlias<"mov $Rd, $Rn",
> - (addop GPRsp:$Rd, SP:$Rn, 0),
> - 0b1>;
> -
> - def _tosp : InstAlias<"mov $Rd, $Rn",
> - (addop SP:$Rd, GPRsp:$Rn, 0),
> - 0b1>;
> -}
> -
> -// Recall Rxsp is a RegisterClass containing *just* xsp.
> -defm MOVxx : MOVsp<GPR64xsp, Rxsp, ADDxxi_lsl0_s>;
> -defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDwwi_lsl0_s>;
> -
> -//===----------------------------------------------------------------------===//
> -// Add-subtract (shifted register) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS
> -
> -//===-------------------------------
> -// 1. The "shifted register" operands. Shared with logical insts.
> -//===-------------------------------
> -
> -multiclass shift_operands<string prefix, string form> {
> - def _asmoperand_i32 : AsmOperandClass {
> - let Name = "Shift" # form # "i32";
> - let RenderMethod = "addShiftOperands";
> - let PredicateMethod = "isShift<A64SE::" # form # ", false>";
> - let DiagnosticType = "AddSubRegShift32";
> - }
> -
> - // Note that the operand type is intentionally i64 because the DAGCombiner
> - // puts these into a canonical form.
> - def _i32 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
> - let ParserMatchClass
> - = !cast<AsmOperandClass>(prefix # "_asmoperand_i32");
> - let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
> - let DecoderMethod = "Decode32BitShiftOperand";
> - }
> -
> - def _asmoperand_i64 : AsmOperandClass {
> - let Name = "Shift" # form # "i64";
> - let RenderMethod = "addShiftOperands";
> - let PredicateMethod = "isShift<A64SE::" # form # ", true>";
> - let DiagnosticType = "AddSubRegShift64";
> - }
> -
> - def _i64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
> - let ParserMatchClass
> - = !cast<AsmOperandClass>(prefix # "_asmoperand_i64");
> - let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
> - }
> -}
> -
> -defm lsl_operand : shift_operands<"lsl_operand", "LSL">;
> -defm lsr_operand : shift_operands<"lsr_operand", "LSR">;
> -defm asr_operand : shift_operands<"asr_operand", "ASR">;
> -
> -// Not used for add/sub, but defined here for completeness. The "logical
> -// (shifted register)" instructions *do* have an ROR variant.
> -defm ror_operand : shift_operands<"ror_operand", "ROR">;
> -
> -//===-------------------------------
> -// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions.
> -//===-------------------------------
> -
> -// N.b. the commutable parameter is just !N. It will be first against the wall
> -// when the revolution comes.
> -multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable,
> - string asmop, SDPatternOperator opfrag, ValueType ty,
> - RegisterClass GPR, list<Register> defs> {
> - let isCommutable = commutable, Defs = defs in {
> - def _lsl : A64I_addsubshift<sf, op, s, 0b00,
> - (outs GPR:$Rd),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("lsl_operand_" # ty):$Imm6),
> - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> - [(set GPR:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
> - !cast<Operand>("lsl_operand_" # ty):$Imm6))
> - )],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU]>;
> -
> - def _lsr : A64I_addsubshift<sf, op, s, 0b01,
> - (outs GPR:$Rd),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("lsr_operand_" # ty):$Imm6),
> - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> - [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
> - !cast<Operand>("lsr_operand_" # ty):$Imm6))
> - )],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU]>;
> -
> - def _asr : A64I_addsubshift<sf, op, s, 0b10,
> - (outs GPR:$Rd),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("asr_operand_" # ty):$Imm6),
> - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> - [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
> - !cast<Operand>("asr_operand_" # ty):$Imm6))
> - )],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU]>;
> - }
> -
> - def _noshift
> - : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
> - (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
> - GPR:$Rm, 0)>;
> -
> - def : Pat<(opfrag ty:$Rn, ty:$Rm),
> - (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
> -}
> -
> -multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable,
> - string asmop, SDPatternOperator opfrag,
> - list<Register> defs> {
> - defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s,
> - commutable, asmop, opfrag, i64, GPR64, defs>;
> - defm www : addsub_shifts<prefix # "www", 0b0, op, s,
> - commutable, asmop, opfrag, i32, GPR32, defs>;
> -}
> -
> -
> -defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>;
> -defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>;
> -
> -defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>;
> -defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>;
> -
> -//===-------------------------------
> -// 1. The NEG/NEGS aliases
> -//===-------------------------------
> -
> -multiclass neg_alias<Instruction INST, RegisterClass GPR, Register ZR,
> - ValueType ty, Operand shift_operand, SDNode shiftop> {
> - def : InstAlias<"neg $Rd, $Rm, $Imm6",
> - (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
> -
> - def : Pat<(sub 0, (shiftop ty:$Rm, shift_operand:$Imm6)),
> - (INST ZR, $Rm, shift_operand:$Imm6)>;
> -}
> -
> -defm : neg_alias<SUBwww_lsl, GPR32, WZR, i32, lsl_operand_i32, shl>;
> -defm : neg_alias<SUBwww_lsr, GPR32, WZR, i32, lsr_operand_i32, srl>;
> -defm : neg_alias<SUBwww_asr, GPR32, WZR, i32, asr_operand_i32, sra>;
> -def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
> -def : Pat<(sub 0, i32:$Rm), (SUBwww_lsl WZR, $Rm, 0)>;
> -
> -defm : neg_alias<SUBxxx_lsl, GPR64, XZR, i64, lsl_operand_i64, shl>;
> -defm : neg_alias<SUBxxx_lsr, GPR64, XZR, i64, lsr_operand_i64, srl>;
> -defm : neg_alias<SUBxxx_asr, GPR64, XZR, i64, asr_operand_i64, sra>;
> -def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
> -def : Pat<(sub 0, i64:$Rm), (SUBxxx_lsl XZR, $Rm, 0)>;
> -
> -// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to
> -// be involved.
> -class negs_alias<Instruction INST, RegisterClass GPR,
> - Register ZR, Operand shift_operand, SDNode shiftop>
> - : InstAlias<"negs $Rd, $Rm, $Imm6",
> - (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
> -
> -def : negs_alias<SUBSwww_lsl, GPR32, WZR, lsl_operand_i32, shl>;
> -def : negs_alias<SUBSwww_lsr, GPR32, WZR, lsr_operand_i32, srl>;
> -def : negs_alias<SUBSwww_asr, GPR32, WZR, asr_operand_i32, sra>;
> -def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
> -
> -def : negs_alias<SUBSxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>;
> -def : negs_alias<SUBSxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>;
> -def : negs_alias<SUBSxxx_asr, GPR64, XZR, asr_operand_i64, sra>;
> -def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
> -
> -//===-------------------------------
> -// 1. The CMP/CMN aliases
> -//===-------------------------------
> -
> -multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable,
> - string asmop, SDPatternOperator opfrag, ValueType ty,
> - RegisterClass GPR> {
> - let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in {
> - def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00,
> - (outs),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("lsl_operand_" # ty):$Imm6),
> - !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
> - [(set NZCV, (opfrag ty:$Rn, (shl ty:$Rm,
> - !cast<Operand>("lsl_operand_" # ty):$Imm6))
> - )],
> - NoItinerary>,
> - Sched<[WriteCMP, ReadCMP, ReadCMP]>;
> -
> - def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01,
> - (outs),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("lsr_operand_" # ty):$Imm6),
> - !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
> - [(set NZCV, (opfrag ty:$Rn, (srl ty:$Rm,
> - !cast<Operand>("lsr_operand_" # ty):$Imm6))
> - )],
> - NoItinerary>,
> - Sched<[WriteCMP, ReadCMP, ReadCMP]>;
> -
> - def _asr : A64I_addsubshift<sf, op, 0b1, 0b10,
> - (outs),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("asr_operand_" # ty):$Imm6),
> - !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
> - [(set NZCV, (opfrag ty:$Rn, (sra ty:$Rm,
> - !cast<Operand>("asr_operand_" # ty):$Imm6))
> - )],
> - NoItinerary>,
> - Sched<[WriteCMP, ReadCMP, ReadCMP]>;
> - }
> -
> - def _noshift
> - : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
> - (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
> -
> - def : Pat<(opfrag ty:$Rn, ty:$Rm),
> - (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
> -}
> -
> -defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, i32, GPR32>;
> -defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, i64, GPR64>;
> -
> -defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, i32, GPR32>;
> -defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, i64, GPR64>;
> -
> -//===----------------------------------------------------------------------===//
> -// Add-subtract (with carry) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS
> -
> -multiclass A64I_addsubcarrySizes<bit op, bit s, string asmop> {
> - let Uses = [NZCV] in {
> - def www : A64I_addsubcarry<0b0, op, s, 0b000000,
> - (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm),
> - !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
> - [], NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> - def xxx : A64I_addsubcarry<0b1, op, s, 0b000000,
> - (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
> - !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
> - [], NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> - }
> -}
> -
> -let isCommutable = 1 in {
> - defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">;
> -}
> -
> -defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">;
> -
> -let Defs = [NZCV] in {
> - let isCommutable = 1 in {
> - defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">;
> - }
> -
> - defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">;
> -}
> -
> -def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>;
> -def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
> -def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>;
> -def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
> -
> -// Note that adde and sube can form a chain longer than two (e.g. for 256-bit
> -// addition). So the flag-setting instructions are appropriate.
> -def : Pat<(adde i32:$Rn, i32:$Rm), (ADCSwww $Rn, $Rm)>;
> -def : Pat<(adde i64:$Rn, i64:$Rm), (ADCSxxx $Rn, $Rm)>;
> -def : Pat<(sube i32:$Rn, i32:$Rm), (SBCSwww $Rn, $Rm)>;
> -def : Pat<(sube i64:$Rn, i64:$Rm), (SBCSxxx $Rn, $Rm)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Bitfield
> -//===----------------------------------------------------------------------===//
> -// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL,
> -// UBFIZ, UBFX
> -
> -// Because of the rather complicated nearly-overlapping aliases, the decoding of
> -// this range of instructions is handled manually. The architectural
> -// instructions are BFM, SBFM and UBFM but a disassembler should never produce
> -// these.
> -//
> -// In the end, the best option was to use BFM instructions for decoding under
> -// almost all circumstances, but to create aliasing *Instructions* for each of
> -// the canonical forms and specify a completely custom decoder which would
> -// substitute the correct MCInst as needed.
> -//
> -// This also simplifies instruction selection, parsing etc because the MCInsts
> -// have a shape that's closer to their use in code.
> -
> -//===-------------------------------
> -// 1. The architectural BFM instructions
> -//===-------------------------------
> -
> -def uimm5_asmoperand : AsmOperandClass {
> - let Name = "UImm5";
> - let PredicateMethod = "isUImm<5>";
> - let RenderMethod = "addImmOperands";
> - let DiagnosticType = "UImm5";
> -}
> -
> -def uimm6_asmoperand : AsmOperandClass {
> - let Name = "UImm6";
> - let PredicateMethod = "isUImm<6>";
> - let RenderMethod = "addImmOperands";
> - let DiagnosticType = "UImm6";
> -}
> -
> -def bitfield32_imm : Operand<i64>,
> - ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32; }]> {
> - let ParserMatchClass = uimm5_asmoperand;
> -
> - let DecoderMethod = "DecodeBitfield32ImmOperand";
> -}
> -
> -
> -def bitfield64_imm : Operand<i64>,
> - ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
> - let ParserMatchClass = uimm6_asmoperand;
> -
> - // Default decoder works in 64-bit case: the 6-bit field can take any value.
> -}
> -
> -multiclass A64I_bitfieldSizes<bits<2> opc, string asmop> {
> - def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
> - (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
> - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
> - [], NoItinerary>,
> - Sched<[WriteALU, ReadALU]> {
> - let DecoderMethod = "DecodeBitfieldInstruction";
> - }
> -
> - def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
> - (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
> - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
> - [], NoItinerary>,
> - Sched<[WriteALU, ReadALU]> {
> - let DecoderMethod = "DecodeBitfieldInstruction";
> - }
> -}
> -
> -defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">;
> -defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">;
> -
> -// BFM instructions modify the destination register rather than defining it
> -// completely.
> -def BFMwwii :
> - A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
> - (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
> - "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]> {
> - let DecoderMethod = "DecodeBitfieldInstruction";
> - let Constraints = "$src = $Rd";
> -}
> -
> -def BFMxxii :
> - A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
> - (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
> - "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]> {
> - let DecoderMethod = "DecodeBitfieldInstruction";
> - let Constraints = "$src = $Rd";
> -}
> -
> -
> -//===-------------------------------
> -// 2. Extend aliases to 64-bit dest
> -//===-------------------------------
> -
> -// Unfortunately the extensions that end up as 64-bits cannot be handled by an
> -// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs
> -// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is
> -// not capable of such a map as far as I'm aware
> -
> -// Note that these instructions are strictly more specific than the
> -// BFM ones (in ImmR) so they can handle their own decoding.
> -class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, ValueType dty,
> - string asmop, bits<6> imms, dag pattern>
> - : A64I_bitfield<sf, opc, sf,
> - (outs GPRDest:$Rd), (ins GPR32:$Rn),
> - !strconcat(asmop, "\t$Rd, $Rn"),
> - [(set dty:$Rd, pattern)], NoItinerary>,
> - Sched<[WriteALU, ReadALU]> {
> - let ImmR = 0b000000;
> - let ImmS = imms;
> -}
> -
> -// Signed extensions
> -def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtb", 7,
> - (sext_inreg (anyext i32:$Rn), i8)>;
> -def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxtb", 7,
> - (sext_inreg i32:$Rn, i8)>;
> -def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxth", 15,
> - (sext_inreg (anyext i32:$Rn), i16)>;
> -def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxth", 15,
> - (sext_inreg i32:$Rn, i16)>;
> -def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtw", 31, (sext i32:$Rn)>;
> -
> -// Unsigned extensions
> -def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxtb", 7,
> - (and i32:$Rn, 255)>;
> -def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxth", 15,
> - (and i32:$Rn, 65535)>;
> -
> -// The 64-bit unsigned variants are not strictly architectural but recommended
> -// for consistency.
> -let isAsmParserOnly = 1 in {
> - def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxtb", 7,
> - (and (anyext i32:$Rn), 255)>;
> - def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxth", 15,
> - (and (anyext i32:$Rn), 65535)>;
> -}
> -
> -// Extra patterns for when the source register is actually 64-bits
> -// too. There's no architectural difference here, it's just LLVM
> -// shinanigans. There's no need for equivalent zero-extension patterns
> -// because they'll already be caught by logical (immediate) matching.
> -def : Pat<(sext_inreg i64:$Rn, i8),
> - (SXTBxw (EXTRACT_SUBREG $Rn, sub_32))>;
> -def : Pat<(sext_inreg i64:$Rn, i16),
> - (SXTHxw (EXTRACT_SUBREG $Rn, sub_32))>;
> -def : Pat<(sext_inreg i64:$Rn, i32),
> - (SXTWxw (EXTRACT_SUBREG $Rn, sub_32))>;
> -
> -
> -//===-------------------------------
> -// 3. Aliases for ASR and LSR (the simple shifts)
> -//===-------------------------------
> -
> -// These also handle their own decoding because ImmS being set makes
> -// them take precedence over BFM.
> -multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> {
> - def wwi : A64I_bitfield<0b0, opc, 0b0,
> - (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR),
> - !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
> - [(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU]> {
> - let ImmS = 31;
> - }
> -
> - def xxi : A64I_bitfield<0b1, opc, 0b1,
> - (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR),
> - !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
> - [(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU]> {
> - let ImmS = 63;
> - }
> -
> -}
> -
> -defm ASR : A64I_shift<0b00, "asr", sra>;
> -defm LSR : A64I_shift<0b10, "lsr", srl>;
> -
> -//===-------------------------------
> -// 4. Aliases for LSL
> -//===-------------------------------
> -
> -// Unfortunately LSL and subsequent aliases are much more complicated. We need
> -// to be able to say certain output instruction fields depend in a complex
> -// manner on combinations of input assembly fields).
> -//
> -// MIOperandInfo *might* have been able to do it, but at the cost of
> -// significantly more C++ code.
> -
> -// N.b. contrary to usual practice these operands store the shift rather than
> -// the machine bits in an MCInst. The complexity overhead of consistency
> -// outweighed the benefits in this case (custom asmparser, printer and selection
> -// vs custom encoder).
> -def bitfield32_lsl_imm : Operand<i64>,
> - ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
> - let ParserMatchClass = uimm5_asmoperand;
> - let EncoderMethod = "getBitfield32LSLOpValue";
> -}
> -
> -def bitfield64_lsl_imm : Operand<i64>,
> - ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
> - let ParserMatchClass = uimm6_asmoperand;
> - let EncoderMethod = "getBitfield64LSLOpValue";
> -}
> -
> -class A64I_bitfield_lsl<bit sf, RegisterClass GPR, ValueType ty,
> - Operand operand>
> - : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm),
> - "lsl\t$Rd, $Rn, $FullImm",
> - [(set ty:$Rd, (shl ty:$Rn, operand:$FullImm))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU]> {
> - bits<12> FullImm;
> - let ImmR = FullImm{5-0};
> - let ImmS = FullImm{11-6};
> -
> - // No disassembler allowed because it would overlap with BFM which does the
> - // actual work.
> - let isAsmParserOnly = 1;
> -}
> -
> -def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, i32, bitfield32_lsl_imm>;
> -def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, i64, bitfield64_lsl_imm>;
> -
> -//===-------------------------------
> -// 5. Aliases for bitfield extract instructions
> -//===-------------------------------
> -
> -def bfx32_width_asmoperand : AsmOperandClass {
> - let Name = "BFX32Width";
> - let PredicateMethod = "isBitfieldWidth<32>";
> - let RenderMethod = "addBFXWidthOperands";
> - let DiagnosticType = "Width32";
> -}
> -
> -def bfx32_width : Operand<i64>, ImmLeaf<i64, [{ return true; }]> {
> - let PrintMethod = "printBFXWidthOperand";
> - let ParserMatchClass = bfx32_width_asmoperand;
> -}
> -
> -def bfx64_width_asmoperand : AsmOperandClass {
> - let Name = "BFX64Width";
> - let PredicateMethod = "isBitfieldWidth<64>";
> - let RenderMethod = "addBFXWidthOperands";
> - let DiagnosticType = "Width64";
> -}
> -
> -def bfx64_width : Operand<i64> {
> - let PrintMethod = "printBFXWidthOperand";
> - let ParserMatchClass = bfx64_width_asmoperand;
> -}
> -
> -
> -multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> {
> - def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
> - (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
> - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
> - [(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU]> {
> - // As above, no disassembler allowed.
> - let isAsmParserOnly = 1;
> - }
> -
> - def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
> - (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
> - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
> - [(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU]> {
> - // As above, no disassembler allowed.
> - let isAsmParserOnly = 1;
> - }
> -}
> -
> -defm SBFX : A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>;
> -defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>;
> -
> -// Again, variants based on BFM modify Rd so need it as an input too.
> -def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
> - (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
> - "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]> {
> - // As above, no disassembler allowed.
> - let isAsmParserOnly = 1;
> - let Constraints = "$src = $Rd";
> -}
> -
> -def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
> - (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
> - "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]> {
> - // As above, no disassembler allowed.
> - let isAsmParserOnly = 1;
> - let Constraints = "$src = $Rd";
> -}
> -
> -// SBFX instructions can do a 1-instruction sign-extension of boolean values.
> -def : Pat<(sext_inreg i64:$Rn, i1), (SBFXxxii $Rn, 0, 0)>;
> -def : Pat<(sext_inreg i32:$Rn, i1), (SBFXwwii $Rn, 0, 0)>;
> -def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)),
> - (SBFXxxii (SUBREG_TO_REG (i64 0), $Rn, sub_32), 0, 0)>;
> -
> -// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
> -// use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
> -def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
> - sub_32)>;
> -
> -//===-------------------------------
> -// 6. Aliases for bitfield insert instructions
> -//===-------------------------------
> -
> -def bfi32_lsb_asmoperand : AsmOperandClass {
> - let Name = "BFI32LSB";
> - let PredicateMethod = "isUImm<5>";
> - let RenderMethod = "addBFILSBOperands<32>";
> - let DiagnosticType = "UImm5";
> -}
> -
> -def bfi32_lsb : Operand<i64>,
> - ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
> - let PrintMethod = "printBFILSBOperand<32>";
> - let ParserMatchClass = bfi32_lsb_asmoperand;
> -}
> -
> -def bfi64_lsb_asmoperand : AsmOperandClass {
> - let Name = "BFI64LSB";
> - let PredicateMethod = "isUImm<6>";
> - let RenderMethod = "addBFILSBOperands<64>";
> - let DiagnosticType = "UImm6";
> -}
> -
> -def bfi64_lsb : Operand<i64>,
> - ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
> - let PrintMethod = "printBFILSBOperand<64>";
> - let ParserMatchClass = bfi64_lsb_asmoperand;
> -}
> -
> -// Width verification is performed during conversion so width operand can be
> -// shared between 32/64-bit cases. Still needed for the print method though
> -// because ImmR encodes "width - 1".
> -def bfi32_width_asmoperand : AsmOperandClass {
> - let Name = "BFI32Width";
> - let PredicateMethod = "isBitfieldWidth<32>";
> - let RenderMethod = "addBFIWidthOperands";
> - let DiagnosticType = "Width32";
> -}
> -
> -def bfi32_width : Operand<i64>,
> - ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 32; }]> {
> - let PrintMethod = "printBFIWidthOperand";
> - let ParserMatchClass = bfi32_width_asmoperand;
> -}
> -
> -def bfi64_width_asmoperand : AsmOperandClass {
> - let Name = "BFI64Width";
> - let PredicateMethod = "isBitfieldWidth<64>";
> - let RenderMethod = "addBFIWidthOperands";
> - let DiagnosticType = "Width64";
> -}
> -
> -def bfi64_width : Operand<i64>,
> - ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 64; }]> {
> - let PrintMethod = "printBFIWidthOperand";
> - let ParserMatchClass = bfi64_width_asmoperand;
> -}
> -
> -multiclass A64I_bitfield_insert<bits<2> opc, string asmop> {
> - def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
> - (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
> - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
> - [], NoItinerary>,
> - Sched<[WriteALU, ReadALU]> {
> - // As above, no disassembler allowed.
> - let isAsmParserOnly = 1;
> - }
> -
> - def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
> - (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
> - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
> - [], NoItinerary>,
> - Sched<[WriteALU, ReadALU]> {
> - // As above, no disassembler allowed.
> - let isAsmParserOnly = 1;
> - }
> -}
> -
> -defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">;
> -defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">;
> -
> -
> -def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
> - (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
> - "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]> {
> - // As above, no disassembler allowed.
> - let isAsmParserOnly = 1;
> - let Constraints = "$src = $Rd";
> -}
> -
> -def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
> - (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
> - "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]> {
> - // As above, no disassembler allowed.
> - let isAsmParserOnly = 1;
> - let Constraints = "$src = $Rd";
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Compare and branch (immediate)
> -//===----------------------------------------------------------------------===//
> -// Contains: CBZ, CBNZ
> -
> -class label_asmoperand<int width, int scale> : AsmOperandClass {
> - let Name = "Label" # width # "_" # scale;
> - let PredicateMethod = "isLabel<" # width # "," # scale # ">";
> - let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">";
> - let DiagnosticType = "Label";
> -}
> -
> -def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>;
> -
> -// All conditional immediate branches are the same really: 19 signed bits scaled
> -// by the instruction-size (4).
> -def bcc_target : Operand<OtherVT> {
> - // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
> - let ParserMatchClass = label_wid19_scal4_asmoperand;
> - let PrintMethod = "printLabelOperand<19, 4>";
> - let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_condbr>";
> - let OperandType = "OPERAND_PCREL";
> -}
> -
> -multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP> {
> - let isBranch = 1, isTerminator = 1 in {
> - def x : A64I_cmpbr<0b1, op,
> - (outs),
> - (ins GPR64:$Rt, bcc_target:$Label),
> - !strconcat(asmop,"\t$Rt, $Label"),
> - [(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)],
> - NoItinerary>,
> - Sched<[WriteBr, ReadBr]>;
> -
> - def w : A64I_cmpbr<0b0, op,
> - (outs),
> - (ins GPR32:$Rt, bcc_target:$Label),
> - !strconcat(asmop,"\t$Rt, $Label"),
> - [(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)],
> - NoItinerary>,
> - Sched<[WriteBr, ReadBr]>;
> - }
> -}
> -
> -defm CBZ : cmpbr_sizes<0b0, "cbz", ImmLeaf<i32, [{
> - return Imm == A64CC::EQ;
> -}]> >;
> -defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf<i32, [{
> - return Imm == A64CC::NE;
> -}]> >;
> -
> -//===----------------------------------------------------------------------===//
> -// Conditional branch (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: B.cc
> -
> -def cond_code_asmoperand : AsmOperandClass {
> - let Name = "CondCode";
> - let DiagnosticType = "CondCode";
> -}
> -
> -def cond_code : Operand<i32>, ImmLeaf<i32, [{
> - return Imm >= 0 && Imm <= 15;
> -}]> {
> - let PrintMethod = "printCondCodeOperand";
> - let ParserMatchClass = cond_code_asmoperand;
> -}
> -
> -def Bcc : A64I_condbr<0b0, 0b0, (outs),
> - (ins cond_code:$Cond, bcc_target:$Label),
> - "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)],
> - NoItinerary>,
> - Sched<[WriteBr]> {
> - let Uses = [NZCV];
> - let isBranch = 1;
> - let isTerminator = 1;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Conditional compare (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: CCMN, CCMP
> -
> -def uimm4_asmoperand : AsmOperandClass {
> - let Name = "UImm4";
> - let PredicateMethod = "isUImm<4>";
> - let RenderMethod = "addImmOperands";
> - let DiagnosticType = "UImm4";
> -}
> -
> -def uimm4 : Operand<i32> {
> - let ParserMatchClass = uimm4_asmoperand;
> -}
> -
> -def uimm5 : Operand<i32> {
> - let ParserMatchClass = uimm5_asmoperand;
> -}
> -
> -// The only difference between this operand and the one for instructions like
> -// B.cc is that it's parsed manually. The other get parsed implicitly as part of
> -// the mnemonic handling.
> -def cond_code_op_asmoperand : AsmOperandClass {
> - let Name = "CondCodeOp";
> - let RenderMethod = "addCondCodeOperands";
> - let PredicateMethod = "isCondCode";
> - let ParserMethod = "ParseCondCodeOperand";
> - let DiagnosticType = "CondCode";
> -}
> -
> -def cond_code_op : Operand<i32> {
> - let PrintMethod = "printCondCodeOperand";
> - let ParserMatchClass = cond_code_op_asmoperand;
> -}
> -
> -class A64I_condcmpimmImpl<bit sf, bit op, RegisterClass GPR, string asmop>
> - : A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs),
> - (ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond),
> - !strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"),
> - [], NoItinerary>,
> - Sched<[WriteCMP, ReadCMP]> {
> - let Defs = [NZCV];
> -}
> -
> -def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">;
> -def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">;
> -def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">;
> -def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">;
> -
> -//===----------------------------------------------------------------------===//
> -// Conditional compare (register) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: CCMN, CCMP
> -
> -class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop>
> - : A64I_condcmpreg<sf, op, 0b0, 0b0, 0b1,
> - (outs),
> - (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
> - !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
> - [], NoItinerary>,
> - Sched<[WriteCMP, ReadCMP, ReadCMP]> {
> - let Defs = [NZCV];
> -}
> -
> -def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">;
> -def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">;
> -def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">;
> -def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">;
> -
> -//===----------------------------------------------------------------------===//
> -// Conditional select instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG
> -
> -// Condition code which is encoded as the inversion (semantically rather than
> -// bitwise) in the instruction.
> -def inv_cond_code_op_asmoperand : AsmOperandClass {
> - let Name = "InvCondCodeOp";
> - let RenderMethod = "addInvCondCodeOperands";
> - let PredicateMethod = "isCondCode";
> - let ParserMethod = "ParseCondCodeOperand";
> - let DiagnosticType = "CondCode";
> -}
> -
> -def inv_cond_code_op : Operand<i32> {
> - let ParserMatchClass = inv_cond_code_op_asmoperand;
> - let PrintMethod = "printInverseCondCodeOperand";
> -}
> -
> -// Having a separate operand for the selectable use-case is debatable, but gives
> -// consistency with cond_code.
> -def inv_cond_XFORM : SDNodeXForm<imm, [{
> - A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(N->getZExtValue());
> - return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32);
> -}]>;
> -
> -def inv_cond_code
> - : ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 15; }], inv_cond_XFORM>;
> -
> -
> -multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
> - SDPatternOperator select> {
> - let Uses = [NZCV] in {
> - def wwwc : A64I_condsel<0b0, op, 0b0, op2,
> - (outs GPR32:$Rd),
> - (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond),
> - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
> - [(set i32:$Rd, (select i32:$Rn, i32:$Rm))],
> - NoItinerary>,
> - Sched<[WriteCMP, ReadCMP, ReadCMP]>;
> -
> -
> - def xxxc : A64I_condsel<0b1, op, 0b0, op2,
> - (outs GPR64:$Rd),
> - (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond),
> - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
> - [(set i64:$Rd, (select i64:$Rn, i64:$Rm))],
> - NoItinerary>,
> - Sched<[WriteCMP, ReadCMP, ReadCMP]>;
> - }
> -}
> -
> -def simple_select
> - : PatFrag<(ops node:$lhs, node:$rhs),
> - (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>;
> -
> -class complex_select<SDPatternOperator opnode>
> - : PatFrag<(ops node:$lhs, node:$rhs),
> - (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>;
> -
> -
> -defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>;
> -defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc",
> - complex_select<PatFrag<(ops node:$val),
> - (add node:$val, 1)>>>;
> -defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select<not>>;
> -defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select<ineg>>;
> -
> -// Now the instruction aliases, which fit nicely into LLVM's model:
> -
> -def : InstAlias<"cset $Rd, $Cond",
> - (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cset $Rd, $Cond",
> - (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"csetm $Rd, $Cond",
> - (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"csetm $Rd, $Cond",
> - (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cinc $Rd, $Rn, $Cond",
> - (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cinc $Rd, $Rn, $Cond",
> - (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cinv $Rd, $Rn, $Cond",
> - (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cinv $Rd, $Rn, $Cond",
> - (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cneg $Rd, $Rn, $Cond",
> - (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
> -def : InstAlias<"cneg $Rd, $Rn, $Cond",
> - (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
> -
> -// Finally some helper patterns.
> -
> -// For CSET (a.k.a. zero-extension of icmp)
> -def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
> - (CSINCwwwc WZR, WZR, cond_code:$Cond)>;
> -def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
> - (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>;
> -
> -def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
> - (CSINCxxxc XZR, XZR, cond_code:$Cond)>;
> -def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
> - (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>;
> -
> -// For CSETM (a.k.a. sign-extension of icmp)
> -def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
> - (CSINVwwwc WZR, WZR, cond_code:$Cond)>;
> -def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
> - (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>;
> -
> -def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
> - (CSINVxxxc XZR, XZR, cond_code:$Cond)>;
> -def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
> - (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>;
> -
> -// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of
> -// commutativity. The instructions are to complex for isCommutable to be used,
> -// so we have to create the patterns manually:
> -
> -// No commutable pattern for CSEL since the commuted version is isomorphic.
> -
> -// CSINC
> -def :Pat<(A64select_cc NZCV, (add i32:$Rm, 1), i32:$Rn, inv_cond_code:$Cond),
> - (CSINCwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
> -def :Pat<(A64select_cc NZCV, (add i64:$Rm, 1), i64:$Rn, inv_cond_code:$Cond),
> - (CSINCxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
> -
> -// CSINV
> -def :Pat<(A64select_cc NZCV, (not i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
> - (CSINVwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
> -def :Pat<(A64select_cc NZCV, (not i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
> - (CSINVxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
> -
> -// CSNEG
> -def :Pat<(A64select_cc NZCV, (ineg i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
> - (CSNEGwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
> -def :Pat<(A64select_cc NZCV, (ineg i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
> - (CSNEGxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Data Processing (1 source) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: RBIT, REV16, REV, REV32, CLZ, CLS.
> -
> -// We define an unary operator which always fails. We will use this to
> -// define unary operators that cannot be matched.
> -
> -class A64I_dp_1src_impl<bit sf, bits<6> opcode, string asmop,
> - list<dag> patterns, RegisterClass GPRrc,
> - InstrItinClass itin>:
> - A64I_dp_1src<sf,
> - 0,
> - 0b00000,
> - opcode,
> - !strconcat(asmop, "\t$Rd, $Rn"),
> - (outs GPRrc:$Rd),
> - (ins GPRrc:$Rn),
> - patterns,
> - itin>,
> - Sched<[WriteALU, ReadALU]>;
> -
> -multiclass A64I_dp_1src <bits<6> opcode, string asmop> {
> - let hasSideEffects = 0 in {
> - def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>;
> - def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>;
> - }
> -}
> -
> -defm RBIT : A64I_dp_1src<0b000000, "rbit">;
> -defm CLS : A64I_dp_1src<0b000101, "cls">;
> -defm CLZ : A64I_dp_1src<0b000100, "clz">;
> -
> -def : Pat<(ctlz i32:$Rn), (CLZww $Rn)>;
> -def : Pat<(ctlz i64:$Rn), (CLZxx $Rn)>;
> -def : Pat<(ctlz_zero_undef i32:$Rn), (CLZww $Rn)>;
> -def : Pat<(ctlz_zero_undef i64:$Rn), (CLZxx $Rn)>;
> -
> -def : Pat<(cttz i32:$Rn), (CLZww (RBITww $Rn))>;
> -def : Pat<(cttz i64:$Rn), (CLZxx (RBITxx $Rn))>;
> -def : Pat<(cttz_zero_undef i32:$Rn), (CLZww (RBITww $Rn))>;
> -def : Pat<(cttz_zero_undef i64:$Rn), (CLZxx (RBITxx $Rn))>;
> -
> -
> -def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev",
> - [(set i32:$Rd, (bswap i32:$Rn))],
> - GPR32, NoItinerary>;
> -def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev",
> - [(set i64:$Rd, (bswap i64:$Rn))],
> - GPR64, NoItinerary>;
> -def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32",
> - [(set i64:$Rd, (bswap (rotr i64:$Rn, (i64 32))))],
> - GPR64, NoItinerary>;
> -def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16",
> - [(set i32:$Rd, (bswap (rotr i32:$Rn, (i64 16))))],
> - GPR32,
> - NoItinerary>;
> -def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>;
> -
> -//===----------------------------------------------------------------------===//
> -// Data Processing (2 sources) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL,
> -// LSR, ASR, ROR
> -
> -
> -class dp_2src_impl<bit sf, bits<6> opcode, string asmop, list<dag> patterns,
> - RegisterClass GPRsp,
> - InstrItinClass itin>:
> - A64I_dp_2src<sf,
> - opcode,
> - 0,
> - !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
> - (outs GPRsp:$Rd),
> - (ins GPRsp:$Rn, GPRsp:$Rm),
> - patterns,
> - itin>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -multiclass dp_2src_crc<bit c, string asmop> {
> - def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0},
> - !strconcat(asmop, "b"), [], GPR32, NoItinerary>;
> - def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1},
> - !strconcat(asmop, "h"), [], GPR32, NoItinerary>;
> - def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0},
> - !strconcat(asmop, "w"), [], GPR32, NoItinerary>;
> - def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0,
> - !strconcat(asmop, "x\t$Rd, $Rn, $Rm"),
> - (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -}
> -
> -multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> {
> - def www : dp_2src_impl<0b0,
> - opcode,
> - asmop,
> - [(set i32:$Rd,
> - (op i32:$Rn, (i64 (zext i32:$Rm))))],
> - GPR32,
> - NoItinerary>;
> - def xxx : dp_2src_impl<0b1,
> - opcode,
> - asmop,
> - [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
> - GPR64,
> - NoItinerary>;
> -}
> -
> -
> -multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> {
> - def www : dp_2src_impl<0b0,
> - opcode,
> - asmop,
> - [(set i32:$Rd, (op i32:$Rn, i32:$Rm))],
> - GPR32,
> - NoItinerary>;
> - def xxx : dp_2src_impl<0b1,
> - opcode,
> - asmop,
> - [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
> - GPR64,
> - NoItinerary>;
> -}
> -
> -// Here we define the data processing 2 source instructions.
> -defm CRC32 : dp_2src_crc<0b0, "crc32">;
> -defm CRC32C : dp_2src_crc<0b1, "crc32c">;
> -
> -let SchedRW = [WriteDiv, ReadDiv, ReadDiv] in {
> - defm UDIV : dp_2src<0b000010, "udiv", udiv>;
> - defm SDIV : dp_2src<0b000011, "sdiv", sdiv>;
> -}
> -
> -let SchedRW = [WriteALUs, ReadALU, ReadALU] in {
> - defm LSLV : dp_2src_zext<0b001000, "lsl", shl>;
> - defm LSRV : dp_2src_zext<0b001001, "lsr", srl>;
> - defm ASRV : dp_2src_zext<0b001010, "asr", sra>;
> - defm RORV : dp_2src_zext<0b001011, "ror", rotr>;
> -}
> -
> -// Extra patterns for an incoming 64-bit value for a 32-bit
> -// operation. Since the LLVM operations are undefined (as in C) if the
> -// RHS is out of range, it's perfectly permissible to discard the high
> -// bits of the GPR64.
> -def : Pat<(shl i32:$Rn, i64:$Rm),
> - (LSLVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
> -def : Pat<(srl i32:$Rn, i64:$Rm),
> - (LSRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
> -def : Pat<(sra i32:$Rn, i64:$Rm),
> - (ASRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
> -def : Pat<(rotr i32:$Rn, i64:$Rm),
> - (RORVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
> -
> -// Here we define the aliases for the data processing 2 source instructions.
> -def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">;
> -def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">;
> -def ASR_menmonic : MnemonicAlias<"asrv", "asr">;
> -def ROR_menmonic : MnemonicAlias<"rorv", "ror">;
> -
> -//===----------------------------------------------------------------------===//
> -// Data Processing (3 sources) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH
> -// + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL
> -
> -class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg,
> - ValueType AccTy, RegisterClass SrcReg,
> - string asmop, dag pattern>
> - : A64I_dp3<sf, opcode,
> - (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra),
> - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"),
> - [(set AccTy:$Rd, pattern)], NoItinerary>,
> - Sched<[WriteMAC, ReadMAC, ReadMAC, ReadMAC]> {
> - bits<5> Ra;
> - let Inst{14-10} = Ra;
> -
> - RegisterClass AccGPR = AccReg;
> - RegisterClass SrcGPR = SrcReg;
> -}
> -
> -def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, i32, GPR32, "madd",
> - (add i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
> -def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, i64, GPR64, "madd",
> - (add i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
> -
> -def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, i32, GPR32, "msub",
> - (sub i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
> -def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, i64, GPR64, "msub",
> - (sub i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
> -
> -def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, i64, GPR32, "smaddl",
> - (add i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
> -def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, i64, GPR32, "smsubl",
> - (sub i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
> -
> -def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, i64, GPR32, "umaddl",
> - (add i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
> -def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, i64, GPR32, "umsubl",
> - (sub i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
> -
> -let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in {
> - def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd),
> - (ins GPR64:$Rn, GPR64:$Rm),
> - "umulh\t$Rd, $Rn, $Rm",
> - [(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))],
> - NoItinerary>,
> - Sched<[WriteMAC, ReadMAC, ReadMAC]>;
> -
> - def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd),
> - (ins GPR64:$Rn, GPR64:$Rm),
> - "smulh\t$Rd, $Rn, $Rm",
> - [(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))],
> - NoItinerary>,
> - Sched<[WriteMAC, ReadMAC, ReadMAC]>;
> -}
> -
> -multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST,
> - Register ZR, dag pattern> {
> - def : InstAlias<asmop # " $Rd, $Rn, $Rm",
> - (INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>;
> -
> - def : Pat<pattern, (INST $Rn, $Rm, ZR)>;
> -}
> -
> -defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul i32:$Rn, i32:$Rm)>;
> -defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul i64:$Rn, i64:$Rm)>;
> -
> -defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR,
> - (sub 0, (mul i32:$Rn, i32:$Rm))>;
> -defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR,
> - (sub 0, (mul i64:$Rn, i64:$Rm))>;
> -
> -defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR,
> - (mul (i64 (sext i32:$Rn)), (sext i32:$Rm))>;
> -defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR,
> - (sub 0, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
> -
> -defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR,
> - (mul (i64 (zext i32:$Rn)), (zext i32:$Rm))>;
> -defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR,
> - (sub 0, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
> -
> -
> -//===----------------------------------------------------------------------===//
> -// Exception generation
> -//===----------------------------------------------------------------------===//
> -// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3
> -
> -def uimm16_asmoperand : AsmOperandClass {
> - let Name = "UImm16";
> - let PredicateMethod = "isUImm<16>";
> - let RenderMethod = "addImmOperands";
> - let DiagnosticType = "UImm16";
> -}
> -
> -def uimm16 : Operand<i32> {
> - let ParserMatchClass = uimm16_asmoperand;
> -}
> -
> -class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop>
> - : A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16),
> - !strconcat(asmop, "\t$UImm16"), [], NoItinerary>,
> - Sched<[WriteBr]> {
> - let isBranch = 1;
> - let isTerminator = 1;
> -}
> -
> -def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">;
> -def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">;
> -def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">;
> -def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">;
> -def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">;
> -
> -def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">;
> -def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">;
> -def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">;
> -
> -// The immediate is optional for the DCPS instructions, defaulting to 0.
> -def : InstAlias<"dcps1", (DCPS1i 0)>;
> -def : InstAlias<"dcps2", (DCPS2i 0)>;
> -def : InstAlias<"dcps3", (DCPS3i 0)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Extract (immediate)
> -//===----------------------------------------------------------------------===//
> -// Contains: EXTR + alias ROR
> -
> -def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0,
> - (outs GPR32:$Rd),
> - (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB),
> - "extr\t$Rd, $Rn, $Rm, $LSB",
> - [(set i32:$Rd,
> - (A64Extr i32:$Rn, i32:$Rm, imm:$LSB))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1,
> - (outs GPR64:$Rd),
> - (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB),
> - "extr\t$Rd, $Rn, $Rm, $LSB",
> - [(set i64:$Rd,
> - (A64Extr i64:$Rn, i64:$Rm, imm:$LSB))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -def : InstAlias<"ror $Rd, $Rs, $LSB",
> - (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>;
> -def : InstAlias<"ror $Rd, $Rs, $LSB",
> - (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>;
> -
> -def : Pat<(rotr i32:$Rn, bitfield32_imm:$LSB),
> - (EXTRwwwi $Rn, $Rn, bitfield32_imm:$LSB)>;
> -def : Pat<(rotr i64:$Rn, bitfield64_imm:$LSB),
> - (EXTRxxxi $Rn, $Rn, bitfield64_imm:$LSB)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point compare instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FCMP, FCMPE
> -
> -def fpzero_asmoperand : AsmOperandClass {
> - let Name = "FPZero";
> - let ParserMethod = "ParseFPImmOperand";
> - let DiagnosticType = "FPZero";
> -}
> -
> -def fpz32 : Operand<f32>,
> - ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
> - let ParserMatchClass = fpzero_asmoperand;
> - let PrintMethod = "printFPZeroOperand";
> - let DecoderMethod = "DecodeFPZeroOperand";
> -}
> -
> -def fpz64 : Operand<f64>,
> - ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
> - let ParserMatchClass = fpzero_asmoperand;
> - let PrintMethod = "printFPZeroOperand";
> - let DecoderMethod = "DecodeFPZeroOperand";
> -}
> -
> -def fpz64movi : Operand<i64>,
> - ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
> - let ParserMatchClass = fpzero_asmoperand;
> - let PrintMethod = "printFPZeroOperand";
> - let DecoderMethod = "DecodeFPZeroOperand";
> -}
> -
> -multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
> - def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
> - (outs), ins, "fcmp\t$Rn, $Rm", [pattern],
> - NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
> - let Defs = [NZCV];
> - }
> -
> - def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0},
> - (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
> - let Defs = [NZCV];
> - }
> -}
> -
> -defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm),
> - (set NZCV, (A64cmp f32:$Rn, f32:$Rm))>;
> -defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm),
> - (set NZCV, (A64cmp f64:$Rn, f64:$Rm))>;
> -
> -// What would be Rm should be written as 0; note that even though it's called
> -// "$Rm" here to fit in with the InstrFormats, it's actually an immediate.
> -defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm),
> - (set NZCV, (A64cmp f32:$Rn, fpz32:$Rm))>;
> -
> -defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm),
> - (set NZCV, (A64cmp f64:$Rn, fpz64:$Rm))>;
> -
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point conditional compare instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FCCMP, FCCMPE
> -
> -class A64I_fpccmpImpl<bits<2> type, bit op, RegisterClass FPR, string asmop>
> - : A64I_fpccmp<0b0, 0b0, type, op,
> - (outs),
> - (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
> - !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
> - [], NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
> - let Defs = [NZCV];
> -}
> -
> -def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">;
> -def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">;
> -def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">;
> -def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">;
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point conditional select instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FCSEL
> -
> -let Uses = [NZCV] in {
> - def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd),
> - (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond),
> - "fcsel\t$Rd, $Rn, $Rm, $Cond",
> - [(set f32:$Rd,
> - (simple_select f32:$Rn, f32:$Rm))],
> - NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
> -
> -
> - def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd),
> - (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond),
> - "fcsel\t$Rd, $Rn, $Rm, $Cond",
> - [(set f64:$Rd,
> - (simple_select f64:$Rn, f64:$Rm))],
> - NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point data-processing (1 source)
> -//===----------------------------------------------------------------------===//
> -// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI].
> -
> -def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val),
> - [{ (void)N; return false; }]>;
> -
> -// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d"
> -// syntax. Default to no pattern because most are odd enough not to have one.
> -multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr,
> - SDPatternOperator opnode = FPNoUnop> {
> - def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn),
> - !strconcat(asmstr, "\t$Rd, $Rn"),
> - [(set f32:$Rd, (opnode f32:$Rn))],
> - NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU]>;
> -
> - def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn),
> - !strconcat(asmstr, "\t$Rd, $Rn"),
> - [(set f64:$Rd, (opnode f64:$Rn))],
> - NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU]>;
> -}
> -
> -defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">;
> -defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>;
> -defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>;
> -let SchedRW = [WriteFPSqrt, ReadFPSqrt] in {
> - defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>;
> -}
> -
> -defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">;
> -defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>;
> -defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>;
> -defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>;
> -defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">;
> -defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>;
> -defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>;
> -
> -// The FCVT instrucitons have different source and destination register-types,
> -// but the fields are uniform everywhere a D-register (say) crops up. Package
> -// this information in a Record.
> -class FCVTRegType<RegisterClass rc, bits<2> fld, ValueType vt> {
> - RegisterClass Class = rc;
> - ValueType VT = vt;
> - bit t1 = fld{1};
> - bit t0 = fld{0};
> -}
> -
> -def FCVT16 : FCVTRegType<FPR16, 0b11, f16>;
> -def FCVT32 : FCVTRegType<FPR32, 0b00, f32>;
> -def FCVT64 : FCVTRegType<FPR64, 0b01, f64>;
> -
> -class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode>
> - : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0},
> - {0,0,0,1, DestReg.t1, DestReg.t0},
> - (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn),
> - "fcvt\t$Rd, $Rn",
> - [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU]>;
> -
> -def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>;
> -def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>;
> -def FCVTsd : A64I_fpdp1_fcvt<FCVT32, FCVT64, fround>;
> -def FCVThd : A64I_fpdp1_fcvt<FCVT16, FCVT64, fround>;
> -def FCVTsh : A64I_fpdp1_fcvt<FCVT32, FCVT16, fextend>;
> -def FCVTdh : A64I_fpdp1_fcvt<FCVT64, FCVT16, fextend>;
> -
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point data-processing (2 sources) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL
> -
> -def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs),
> - [{ (void)N; return false; }]>;
> -
> -multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr,
> - SDPatternOperator opnode> {
> - def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode,
> - (outs FPR32:$Rd),
> - (ins FPR32:$Rn, FPR32:$Rm),
> - !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
> - [(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))],
> - NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
> -
> - def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode,
> - (outs FPR64:$Rd),
> - (ins FPR64:$Rn, FPR64:$Rm),
> - !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
> - [(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))],
> - NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
> -}
> -
> -let isCommutable = 1 in {
> - let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
> - defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>;
> - }
> - defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>;
> -
> - // No patterns for these.
> - defm FMAX : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>;
> - defm FMIN : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>;
> - defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>;
> - defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>;
> -
> - let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
> - defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul",
> - PatFrag<(ops node:$lhs, node:$rhs),
> - (fneg (fmul node:$lhs, node:$rhs))> >;
> - }
> -}
> -
> -let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in {
> - defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>;
> -}
> -defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>;
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point data-processing (3 sources) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FMADD, FMSUB, FNMADD, FNMSUB
> -
> -def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
> - (fma (fneg node:$Rn), node:$Rm, node:$Ra)>;
> -def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
> - (fma node:$Rn, node:$Rm, (fneg node:$Ra))>;
> -def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
> - (fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>;
> -
> -class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT,
> - bits<2> type, bit o1, bit o0, SDPatternOperator fmakind>
> - : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd),
> - (ins FPR:$Rn, FPR:$Rm, FPR:$Ra),
> - !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"),
> - [(set VT:$Rd, (fmakind VT:$Rn, VT:$Rm, VT:$Ra))],
> - NoItinerary>,
> - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]>;
> -
> -def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>;
> -def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>;
> -def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>;
> -def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>;
> -
> -def FMADDdddd : A64I_fpdp3Impl<"fmadd", FPR64, f64, 0b01, 0b0, 0b0, fma>;
> -def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>;
> -def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;
> -def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
> -
> -// Extra patterns for when we're allowed to optimise separate multiplication and
> -// addition.
> -let Predicates = [HasFPARMv8, UseFusedMAC] in {
> -def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))),
> - (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
> -def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))),
> - (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
> -def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))),
> - (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
> -def : Pat<(f32 (fsub (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)),
> - (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
> -
> -def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))),
> - (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
> -def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))),
> - (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
> -def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))),
> - (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
> -def : Pat<(f64 (fsub (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)),
> - (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
> -}
> -
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point <-> fixed-point conversion instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
> -
> -// #1-#32 allowed, encoded as "64 - <specified imm>
> -def fixedpos_asmoperand_i32 : AsmOperandClass {
> - let Name = "CVTFixedPos32";
> - let RenderMethod = "addCVTFixedPosOperands";
> - let PredicateMethod = "isCVTFixedPos<32>";
> - let DiagnosticType = "CVTFixedPos32";
> -}
> -
> -// Also encoded as "64 - <specified imm>" but #1-#64 allowed.
> -def fixedpos_asmoperand_i64 : AsmOperandClass {
> - let Name = "CVTFixedPos64";
> - let RenderMethod = "addCVTFixedPosOperands";
> - let PredicateMethod = "isCVTFixedPos<64>";
> - let DiagnosticType = "CVTFixedPos64";
> -}
> -
> -// We need the cartesian product of f32/f64 i32/i64 operands for
> -// conversions:
> -// + Selection needs to use operands of correct floating type
> -// + Assembly parsing and decoding depend on integer width
> -class cvtfix_i32_op<ValueType FloatVT>
> - : Operand<FloatVT>,
> - ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm]> {
> - let ParserMatchClass = fixedpos_asmoperand_i32;
> - let DecoderMethod = "DecodeCVT32FixedPosOperand";
> - let PrintMethod = "printCVTFixedPosOperand";
> -}
> -
> -class cvtfix_i64_op<ValueType FloatVT>
> - : Operand<FloatVT>,
> - ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm]> {
> - let ParserMatchClass = fixedpos_asmoperand_i64;
> - let PrintMethod = "printCVTFixedPosOperand";
> -}
> -
> -// Because of the proliferation of weird operands, it's not really
> -// worth going for a multiclass here. Oh well.
> -
> -class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode,
> - RegisterClass GPR, RegisterClass FPR,
> - ValueType DstTy, ValueType SrcTy,
> - Operand scale_op, string asmop, SDNode cvtop>
> - : A64I_fpfixed<sf, 0b0, type, 0b11, opcode,
> - (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale),
> - !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
> - [(set DstTy:$Rd, (cvtop (fmul SrcTy:$Rn, scale_op:$Scale)))],
> - NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU]>;
> -
> -def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32,
> - cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>;
> -def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, i64, f32,
> - cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>;
> -def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, i32, f32,
> - cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>;
> -def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, i64, f32,
> - cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>;
> -
> -def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, i32, f64,
> - cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>;
> -def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, i64, f64,
> - cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>;
> -def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, i32, f64,
> - cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>;
> -def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, i64, f64,
> - cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>;
> -
> -
> -class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode,
> - RegisterClass FPR, RegisterClass GPR,
> - ValueType DstTy, ValueType SrcTy,
> - Operand scale_op, string asmop, SDNode cvtop>
> - : A64I_fpfixed<sf, 0b0, type, 0b00, opcode,
> - (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale),
> - !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
> - [(set DstTy:$Rd, (fdiv (cvtop SrcTy:$Rn), scale_op:$Scale))],
> - NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU]>;
> -
> -def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32,
> - cvtfix_i32_op<f32>, "scvtf", sint_to_fp>;
> -def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, f32, i64,
> - cvtfix_i64_op<f32>, "scvtf", sint_to_fp>;
> -def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, f32, i32,
> - cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>;
> -def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, f32, i64,
> - cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>;
> -def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, f64, i32,
> - cvtfix_i32_op<f64>, "scvtf", sint_to_fp>;
> -def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, f64, i64,
> - cvtfix_i64_op<f64>, "scvtf", sint_to_fp>;
> -def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, f64, i32,
> - cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>;
> -def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, f64, i64,
> - cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>;
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point <-> integer conversion instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
> -
> -class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode,
> - RegisterClass DestPR, RegisterClass SrcPR, string asmop>
> - : A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn),
> - !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU]>;
> -
> -multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop> {
> - def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0},
> - GPR32, FPR32, asmop # "s">;
> - def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0},
> - GPR64, FPR32, asmop # "s">;
> - def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1},
> - GPR32, FPR32, asmop # "u">;
> - def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1},
> - GPR64, FPR32, asmop # "u">;
> -
> - def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0},
> - GPR32, FPR64, asmop # "s">;
> - def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0},
> - GPR64, FPR64, asmop # "s">;
> - def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1},
> - GPR32, FPR64, asmop # "u">;
> - def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1},
> - GPR64, FPR64, asmop # "u">;
> -}
> -
> -defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">;
> -defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">;
> -defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">;
> -defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">;
> -defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">;
> -
> -let Predicates = [HasFPARMv8] in {
> -def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>;
> -def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>;
> -def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>;
> -def : Pat<(i64 (fp_to_uint f32:$Rn)), (FCVTZUxs $Rn)>;
> -def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>;
> -def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>;
> -def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>;
> -def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>;
> -}
> -
> -multiclass A64I_inttofp<bit o0, string asmop> {
> - def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>;
> - def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>;
> - def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>;
> - def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>;
> -}
> -
> -defm S : A64I_inttofp<0b0, "scvtf">;
> -defm U : A64I_inttofp<0b1, "ucvtf">;
> -
> -let Predicates = [HasFPARMv8] in {
> -def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>;
> -def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>;
> -def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>;
> -def : Pat<(f64 (sint_to_fp i64:$Rn)), (SCVTFdx $Rn)>;
> -def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>;
> -def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>;
> -def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>;
> -def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>;
> -}
> -
> -def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">;
> -def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">;
> -def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">;
> -def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">;
> -
> -let Predicates = [HasFPARMv8] in {
> -def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>;
> -def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>;
> -def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>;
> -def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>;
> -}
> -
> -def lane1_asmoperand : AsmOperandClass {
> - let Name = "Lane1";
> - let RenderMethod = "addImmOperands";
> - let DiagnosticType = "Lane1";
> -}
> -
> -def lane1 : Operand<i32> {
> - let ParserMatchClass = lane1_asmoperand;
> - let PrintMethod = "printBareImmOperand";
> -}
> -
> -let DecoderMethod = "DecodeFMOVLaneInstruction" in {
> - def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110,
> - (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane),
> - "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU]>;
> -
> - def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111,
> - (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane),
> - "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>,
> - Sched<[WriteFPALU, ReadFPALU]>;
> -}
> -
> -let Predicates = [HasFPARMv8] in {
> -def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]",
> - (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>;
> -
> -def : InstAlias<"fmov $Rd.2d[$Lane], $Rn",
> - (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Floating-point immediate instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: FMOV
> -
> -def fpimm_asmoperand : AsmOperandClass {
> - let Name = "FMOVImm";
> - let ParserMethod = "ParseFPImmOperand";
> - let DiagnosticType = "FPImm";
> -}
> -
> -// The MCOperand for these instructions are the encoded 8-bit values.
> -def SDXF_fpimm : SDNodeXForm<fpimm, [{
> - uint32_t Imm8;
> - A64Imms::isFPImm(N->getValueAPF(), Imm8);
> - return CurDAG->getTargetConstant(Imm8, MVT::i32);
> -}]>;
> -
> -class fmov_operand<ValueType FT>
> - : Operand<i32>,
> - PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }],
> - SDXF_fpimm> {
> - let PrintMethod = "printFPImmOperand";
> - let ParserMatchClass = fpimm_asmoperand;
> -}
> -
> -def fmov32_operand : fmov_operand<f32>;
> -def fmov64_operand : fmov_operand<f64>;
> -
> -class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT,
> - Operand fmov_operand>
> - : A64I_fpimm<0b0, 0b0, type, 0b00000,
> - (outs Reg:$Rd),
> - (ins fmov_operand:$Imm8),
> - "fmov\t$Rd, $Imm8",
> - [(set VT:$Rd, fmov_operand:$Imm8)],
> - NoItinerary>,
> - Sched<[WriteFPALU]>;
> -
> -def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>;
> -def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>;
> -
> -//===----------------------------------------------------------------------===//
> -// Load-register (literal) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: LDR, LDRSW, PRFM
> -
> -def ldrlit_label_asmoperand : AsmOperandClass {
> - let Name = "LoadLitLabel";
> - let RenderMethod = "addLabelOperands<19, 4>";
> - let DiagnosticType = "Label";
> -}
> -
> -def ldrlit_label : Operand<i64> {
> - let EncoderMethod = "getLoadLitLabelOpValue";
> -
> - // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
> - let PrintMethod = "printLabelOperand<19, 4>";
> - let ParserMatchClass = ldrlit_label_asmoperand;
> - let OperandType = "OPERAND_PCREL";
> -}
> -
> -// Various instructions take an immediate value (which can always be used),
> -// where some numbers have a symbolic name to make things easier. These operands
> -// and the associated functions abstract away the differences.
> -multiclass namedimm<string prefix, string mapper> {
> - def _asmoperand : AsmOperandClass {
> - let Name = "NamedImm" # prefix;
> - let PredicateMethod = "isUImm";
> - let RenderMethod = "addImmOperands";
> - let ParserMethod = "ParseNamedImmOperand<" # mapper # ">";
> - let DiagnosticType = "NamedImm_" # prefix;
> - }
> -
> - def _op : Operand<i32> {
> - let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
> - let PrintMethod = "printNamedImmOperand<" # mapper # ">";
> - let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">";
> - }
> -}
> -
> -defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">;
> -
> -class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg,
> - list<dag> patterns = []>
> - : A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19),
> - "ldr\t$Rt, $Imm19", patterns, NoItinerary>,
> - Sched<[WriteLd]>;
> -
> -let mayLoad = 1 in {
> - def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>;
> - def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>;
> -}
> -
> -let Predicates = [HasFPARMv8] in {
> -def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>;
> -def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>;
> -}
> -
> -let mayLoad = 1 in {
> - let Predicates = [HasFPARMv8] in {
> - def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>;
> - }
> -
> - def LDRSWx_lit : A64I_LDRlit<0b10, 0b0,
> - (outs GPR64:$Rt),
> - (ins ldrlit_label:$Imm19),
> - "ldrsw\t$Rt, $Imm19",
> - [], NoItinerary>,
> - Sched<[WriteLd]>;
> -
> - def PRFM_lit : A64I_LDRlit<0b11, 0b0,
> - (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19),
> - "prfm\t$Rt, $Imm19",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]>;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Load-store exclusive instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB,
> -// STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB,
> -// STLRH, STLR, LDARB, LDARH, LDAR
> -
> -// Since these instructions have the undefined register bits set to 1 in
> -// their canonical form, we need a post encoder method to set those bits
> -// to 1 when encoding these instructions. We do this using the
> -// fixLoadStoreExclusive function. This function has template parameters:
> -//
> -// fixLoadStoreExclusive<int hasRs, int hasRt2>
> -//
> -// hasRs indicates that the instruction uses the Rs field, so we won't set
> -// it to 1 (and the same for Rt2). We don't need template parameters for
> -// the other register fiels since Rt and Rn are always used.
> -
> -// This operand parses a GPR64xsp register, followed by an optional immediate
> -// #0.
> -def GPR64xsp0_asmoperand : AsmOperandClass {
> - let Name = "GPR64xsp0";
> - let PredicateMethod = "isWrappedReg";
> - let RenderMethod = "addRegOperands";
> - let ParserMethod = "ParseLSXAddressOperand";
> - // Diagnostics are provided by ParserMethod
> -}
> -
> -def GPR64xsp0 : RegisterOperand<GPR64xsp> {
> - let ParserMatchClass = GPR64xsp0_asmoperand;
> -}
> -
> -//===----------------------------------
> -// Store-exclusive (releasing & normal)
> -//===----------------------------------
> -
> -class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
> - dag ins, list<dag> pat,
> - InstrItinClass itin> :
> - A64I_LDSTex_stn <size,
> - opcode{2}, 0, opcode{1}, opcode{0},
> - outs, ins,
> - !strconcat(asm, "\t$Rs, $Rt, [$Rn]"),
> - pat, itin> {
> - let mayStore = 1;
> - let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
> - let Constraints = "@earlyclobber $Rs";
> -}
> -
> -multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
> - def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
> - (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
> - [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt]>;
> -
> - def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
> - (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
> - [],NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt]>;
> -
> - def _word: A64I_SRexs_impl<0b10, opcode, asmstr,
> - (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
> - [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt]>;
> -
> - def _dword: A64I_SRexs_impl<0b11, opcode, asmstr,
> - (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
> - [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt]>;
> -}
> -
> -defm STXR : A64I_SRex<"stxr", 0b000, "STXR">;
> -defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">;
> -
> -//===----------------------------------
> -// Loads
> -//===----------------------------------
> -
> -class A64I_LRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
> - dag ins, list<dag> pat,
> - InstrItinClass itin> :
> - A64I_LDSTex_tn <size,
> - opcode{2}, 1, opcode{1}, opcode{0},
> - outs, ins,
> - !strconcat(asm, "\t$Rt, [$Rn]"),
> - pat, itin> {
> - let mayLoad = 1;
> - let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
> -}
> -
> -multiclass A64I_LRex<string asmstr, bits<3> opcode> {
> - def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
> - (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
> - [], NoItinerary>,
> - Sched<[WriteLd]>;
> -
> - def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
> - (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
> - [], NoItinerary>,
> - Sched<[WriteLd]>;
> -
> - def _word: A64I_LRexs_impl<0b10, opcode, asmstr,
> - (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
> - [], NoItinerary>,
> - Sched<[WriteLd]>;
> -
> - def _dword: A64I_LRexs_impl<0b11, opcode, asmstr,
> - (outs GPR64:$Rt), (ins GPR64xsp0:$Rn),
> - [], NoItinerary>,
> - Sched<[WriteLd]>;
> -}
> -
> -defm LDXR : A64I_LRex<"ldxr", 0b000>;
> -defm LDAXR : A64I_LRex<"ldaxr", 0b001>;
> -defm LDAR : A64I_LRex<"ldar", 0b101>;
> -
> -class acquiring_load<PatFrag base>
> - : PatFrag<(ops node:$ptr), (base node:$ptr), [{
> - AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
> - return Ordering == Acquire || Ordering == SequentiallyConsistent;
> -}]>;
> -
> -def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
> -def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
> -def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
> -def atomic_load_acquire_64 : acquiring_load<atomic_load_64>;
> -
> -def : Pat<(atomic_load_acquire_8 i64:$Rn), (LDAR_byte $Rn)>;
> -def : Pat<(atomic_load_acquire_16 i64:$Rn), (LDAR_hword $Rn)>;
> -def : Pat<(atomic_load_acquire_32 i64:$Rn), (LDAR_word $Rn)>;
> -def : Pat<(atomic_load_acquire_64 i64:$Rn), (LDAR_dword $Rn)>;
> -
> -//===----------------------------------
> -// Store-release (no exclusivity)
> -//===----------------------------------
> -
> -class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
> - dag ins, list<dag> pat,
> - InstrItinClass itin> :
> - A64I_LDSTex_tn <size,
> - opcode{2}, 0, opcode{1}, opcode{0},
> - outs, ins,
> - !strconcat(asm, "\t$Rt, [$Rn]"),
> - pat, itin> {
> - let mayStore = 1;
> - let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
> -}
> -
> -class releasing_store<PatFrag base>
> - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
> - AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
> - return Ordering == Release || Ordering == SequentiallyConsistent;
> -}]>;
> -
> -def atomic_store_release_8 : releasing_store<atomic_store_8>;
> -def atomic_store_release_16 : releasing_store<atomic_store_16>;
> -def atomic_store_release_32 : releasing_store<atomic_store_32>;
> -def atomic_store_release_64 : releasing_store<atomic_store_64>;
> -
> -multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> {
> - def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
> - (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
> - [(atomic_store_release_8 i64:$Rn, i32:$Rt)],
> - NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt]>;
> -
> - def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
> - (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
> - [(atomic_store_release_16 i64:$Rn, i32:$Rt)],
> - NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt]>;
> -
> - def _word: A64I_SLexs_impl<0b10, opcode, asmstr,
> - (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
> - [(atomic_store_release_32 i64:$Rn, i32:$Rt)],
> - NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt]>;
> -
> - def _dword: A64I_SLexs_impl<0b11, opcode, asmstr,
> - (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
> - [(atomic_store_release_64 i64:$Rn, i64:$Rt)],
> - NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt]>;
> -}
> -
> -defm STLR : A64I_SLex<"stlr", 0b101, "STLR">;
> -
> -//===----------------------------------
> -// Store-exclusive pair (releasing & normal)
> -//===----------------------------------
> -
> -class A64I_SPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
> - dag ins, list<dag> pat,
> - InstrItinClass itin> :
> - A64I_LDSTex_stt2n <size,
> - opcode{2}, 0, opcode{1}, opcode{0},
> - outs, ins,
> - !strconcat(asm, "\t$Rs, $Rt, $Rt2, [$Rn]"),
> - pat, itin> {
> - let mayStore = 1;
> -}
> -
> -
> -multiclass A64I_SPex<string asmstr, bits<3> opcode> {
> - def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs),
> - (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2,
> - GPR64xsp0:$Rn),
> - [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
> -
> - def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs),
> - (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2,
> - GPR64xsp0:$Rn),
> - [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
> -}
> -
> -defm STXP : A64I_SPex<"stxp", 0b010>;
> -defm STLXP : A64I_SPex<"stlxp", 0b011>;
> -
> -//===----------------------------------
> -// Load-exclusive pair (acquiring & normal)
> -//===----------------------------------
> -
> -class A64I_LPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
> - dag ins, list<dag> pat,
> - InstrItinClass itin> :
> - A64I_LDSTex_tt2n <size,
> - opcode{2}, 1, opcode{1}, opcode{0},
> - outs, ins,
> - !strconcat(asm, "\t$Rt, $Rt2, [$Rn]"),
> - pat, itin>{
> - let mayLoad = 1;
> - let DecoderMethod = "DecodeLoadPairExclusiveInstruction";
> - let PostEncoderMethod = "fixLoadStoreExclusive<0,1>";
> -}
> -
> -multiclass A64I_LPex<string asmstr, bits<3> opcode> {
> - def _word: A64I_LPexs_impl<0b10, opcode, asmstr,
> - (outs GPR32:$Rt, GPR32:$Rt2),
> - (ins GPR64xsp0:$Rn),
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, ReadLd]>;
> -
> - def _dword: A64I_LPexs_impl<0b11, opcode, asmstr,
> - (outs GPR64:$Rt, GPR64:$Rt2),
> - (ins GPR64xsp0:$Rn),
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, ReadLd]>;
> -}
> -
> -defm LDXP : A64I_LPex<"ldxp", 0b010>;
> -defm LDAXP : A64I_LPex<"ldaxp", 0b011>;
> -
> -//===----------------------------------------------------------------------===//
> -// Load-store register (unscaled immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store register (register offset) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store register (unsigned immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store register (immediate post-indexed) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store register (immediate pre-indexed) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
> -
> -// Note that patterns are much later on in a completely separate section (they
> -// need ADRPxi to be defined).
> -
> -//===-------------------------------
> -// 1. Various operands needed
> -//===-------------------------------
> -
> -//===-------------------------------
> -// 1.1 Unsigned 12-bit immediate operands
> -//===-------------------------------
> -// The addressing mode for these instructions consists of an unsigned 12-bit
> -// immediate which is scaled by the size of the memory access.
> -//
> -// We represent this in the MC layer by two operands:
> -// 1. A base register.
> -// 2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]"
> -// would have '1' in this field.
> -// This means that separate functions are needed for converting representations
> -// which *are* aware of the intended access size.
> -
> -// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
> -// know the access size via some means. An isolated operand does not have this
> -// information unless told from here, which means we need separate tablegen
> -// Operands for each access size. This multiclass takes care of instantiating
> -// the correct template functions in the rest of the backend.
> -
> -//===-------------------------------
> -// 1.1 Unsigned 12-bit immediate operands
> -//===-------------------------------
> -
> -multiclass offsets_uimm12<int MemSize, string prefix> {
> - def uimm12_asmoperand : AsmOperandClass {
> - let Name = "OffsetUImm12_" # MemSize;
> - let PredicateMethod = "isOffsetUImm12<" # MemSize # ">";
> - let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">";
> - let DiagnosticType = "LoadStoreUImm12_" # MemSize;
> - }
> -
> - // Pattern is really no more than an ImmLeaf, but predicated on MemSize which
> - // complicates things beyond TableGen's ken.
> - def uimm12 : Operand<i64>,
> - ComplexPattern<i64, 1, "SelectOffsetUImm12<" # MemSize # ">"> {
> - let ParserMatchClass
> - = !cast<AsmOperandClass>(prefix # uimm12_asmoperand);
> -
> - let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">";
> - let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">";
> - }
> -}
> -
> -defm byte_ : offsets_uimm12<1, "byte_">;
> -defm hword_ : offsets_uimm12<2, "hword_">;
> -defm word_ : offsets_uimm12<4, "word_">;
> -defm dword_ : offsets_uimm12<8, "dword_">;
> -defm qword_ : offsets_uimm12<16, "qword_">;
> -
> -//===-------------------------------
> -// 1.1 Signed 9-bit immediate operands
> -//===-------------------------------
> -
> -// The MCInst is expected to store the bit-wise encoding of the value,
> -// which amounts to lopping off the extended sign bits.
> -def SDXF_simm9 : SDNodeXForm<imm, [{
> - return CurDAG->getTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32);
> -}]>;
> -
> -def simm9_asmoperand : AsmOperandClass {
> - let Name = "SImm9";
> - let PredicateMethod = "isSImm<9>";
> - let RenderMethod = "addSImmOperands<9>";
> - let DiagnosticType = "LoadStoreSImm9";
> -}
> -
> -def simm9 : Operand<i64>,
> - ImmLeaf<i64, [{ return Imm >= -0x100 && Imm <= 0xff; }],
> - SDXF_simm9> {
> - let PrintMethod = "printOffsetSImm9Operand";
> - let ParserMatchClass = simm9_asmoperand;
> -}
> -
> -
> -//===-------------------------------
> -// 1.3 Register offset extensions
> -//===-------------------------------
> -
> -// The assembly-syntax for these addressing-modes is:
> -// [<Xn|SP>, <R><m> {, <extend> {<amount>}}]
> -//
> -// The essential semantics are:
> -// + <amount> is a shift: #<log(transfer size)> or #0
> -// + <R> can be W or X.
> -// + If <R> is W, <extend> can be UXTW or SXTW
> -// + If <R> is X, <extend> can be LSL or SXTX
> -//
> -// The trickiest of those constraints is that Rm can be either GPR32 or GPR64,
> -// which will need separate instructions for LLVM type-consistency. We'll also
> -// need separate operands, of course.
> -multiclass regexts<int MemSize, int RmSize, RegisterClass GPR,
> - string Rm, string prefix> {
> - def regext_asmoperand : AsmOperandClass {
> - let Name = "AddrRegExtend_" # MemSize # "_" # Rm;
> - let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">";
> - let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">";
> - let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize;
> - }
> -
> - def regext : Operand<i64> {
> - let PrintMethod
> - = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">";
> -
> - let DecoderMethod = "DecodeAddrRegExtendOperand";
> - let ParserMatchClass
> - = !cast<AsmOperandClass>(prefix # regext_asmoperand);
> - }
> -}
> -
> -multiclass regexts_wx<int MemSize, string prefix> {
> - // Rm is an X-register if LSL or SXTX are specified as the shift.
> - defm Xm_ : regexts<MemSize, 64, GPR64, "Xm", prefix # "Xm_">;
> -
> - // Rm is a W-register if UXTW or SXTW are specified as the shift.
> - defm Wm_ : regexts<MemSize, 32, GPR32, "Wm", prefix # "Wm_">;
> -}
> -
> -defm byte_ : regexts_wx<1, "byte_">;
> -defm hword_ : regexts_wx<2, "hword_">;
> -defm word_ : regexts_wx<4, "word_">;
> -defm dword_ : regexts_wx<8, "dword_">;
> -defm qword_ : regexts_wx<16, "qword_">;
> -
> -
> -//===------------------------------
> -// 2. The instructions themselves.
> -//===------------------------------
> -
> -// We have the following instructions to implement:
> -// | | B | H | W | X |
> -// |-----------------+-------+-------+-------+--------|
> -// | unsigned str | STRB | STRH | STR | STR |
> -// | unsigned ldr | LDRB | LDRH | LDR | LDR |
> -// | signed ldr to W | LDRSB | LDRSH | - | - |
> -// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) |
> -
> -// This will instantiate the LDR/STR instructions you'd expect to use for an
> -// unsigned datatype (first two rows above) or floating-point register, which is
> -// reasonably uniform across all access sizes.
> -
> -
> -//===------------------------------
> -// 2.1 Regular instructions
> -//===------------------------------
> -
> -// This class covers the basic unsigned or irrelevantly-signed loads and stores,
> -// to general-purpose and floating-point registers.
> -
> -class AddrParams<string prefix> {
> - Operand uimm12 = !cast<Operand>(prefix # "_uimm12");
> -
> - Operand regextWm = !cast<Operand>(prefix # "_Wm_regext");
> - Operand regextXm = !cast<Operand>(prefix # "_Xm_regext");
> -}
> -
> -def byte_addrparams : AddrParams<"byte">;
> -def hword_addrparams : AddrParams<"hword">;
> -def word_addrparams : AddrParams<"word">;
> -def dword_addrparams : AddrParams<"dword">;
> -def qword_addrparams : AddrParams<"qword">;
> -
> -multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
> - bit high_opc, string asmsuffix,
> - RegisterClass GPR, AddrParams params> {
> - // Unsigned immediate
> - def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0},
> - (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12),
> - "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
> - [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt]> {
> - let mayStore = 1;
> - }
> - def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]",
> - (!cast<Instruction>(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
> -
> - def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1},
> - (outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
> - "ldr" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - }
> - def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]",
> - (!cast<Instruction>(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
> -
> - // Register offset (four of these: load/store and Wm/Xm).
> - let mayLoad = 1 in {
> - def _Wm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b0,
> - (outs GPR:$Rt),
> - (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
> - "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd, ReadLd]>;
> -
> - def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1,
> - (outs GPR:$Rt),
> - (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
> - "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd, ReadLd]>;
> - }
> - def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]",
> - (!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn,
> - GPR64:$Rm, 2)>;
> -
> - let mayStore = 1 in {
> - def _Wm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b0,
> - (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm,
> - params.regextWm:$Ext),
> - "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
> - [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
> -
> - def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1,
> - (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm,
> - params.regextXm:$Ext),
> - "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
> - [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
> - }
> - def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]",
> - (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn,
> - GPR64:$Rm, 2)>;
> -
> - // Unaligned immediate
> - def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0},
> - (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
> - "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
> - [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt]> {
> - let mayStore = 1;
> - }
> - def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]",
> - (!cast<Instruction>(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
> -
> - def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1},
> - (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - }
> - def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]",
> - (!cast<Instruction>(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
> -
> - // Post-indexed
> - def _PostInd_STR : A64I_LSpostind<size, v, {high_opc, 0b0},
> - (outs GPR64xsp:$Rn_wb),
> - (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
> - "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
> - [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt]> {
> - let Constraints = "$Rn = $Rn_wb";
> - let mayStore = 1;
> -
> - // Decoder only needed for unpredictability checking (FIXME).
> - let DecoderMethod = "DecodeSingleIndexedInstruction";
> - }
> -
> - def _PostInd_LDR : A64I_LSpostind<size, v, {high_opc, 0b1},
> - (outs GPR:$Rt, GPR64xsp:$Rn_wb),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - let Constraints = "$Rn = $Rn_wb";
> - let DecoderMethod = "DecodeSingleIndexedInstruction";
> - }
> -
> - // Pre-indexed
> - def _PreInd_STR : A64I_LSpreind<size, v, {high_opc, 0b0},
> - (outs GPR64xsp:$Rn_wb),
> - (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
> - "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
> - [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt]> {
> - let Constraints = "$Rn = $Rn_wb";
> - let mayStore = 1;
> -
> - // Decoder only needed for unpredictability checking (FIXME).
> - let DecoderMethod = "DecodeSingleIndexedInstruction";
> - }
> -
> - def _PreInd_LDR : A64I_LSpreind<size, v, {high_opc, 0b1},
> - (outs GPR:$Rt, GPR64xsp:$Rn_wb),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - let Constraints = "$Rn = $Rn_wb";
> - let DecoderMethod = "DecodeSingleIndexedInstruction";
> - }
> -
> -}
> -
> -// STRB/LDRB: First define the instructions
> -defm LS8
> - : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>;
> -
> -// STRH/LDRH
> -defm LS16
> - : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>;
> -
> -
> -// STR/LDR to/from a W register
> -defm LS32
> - : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>;
> -
> -// STR/LDR to/from an X register
> -defm LS64
> - : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>;
> -
> -let Predicates = [HasFPARMv8] in {
> -// STR/LDR to/from a B register
> -defm LSFP8
> - : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>;
> -
> -// STR/LDR to/from an H register
> -defm LSFP16
> - : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>;
> -
> -// STR/LDR to/from an S register
> -defm LSFP32
> - : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>;
> -// STR/LDR to/from a D register
> -defm LSFP64
> - : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>;
> -// STR/LDR to/from a Q register
> -defm LSFP128
> - : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128,
> - qword_addrparams>;
> -}
> -
> -//===------------------------------
> -// 2.3 Signed loads
> -//===------------------------------
> -
> -// Byte and half-word signed loads can both go into either an X or a W register,
> -// so it's worth factoring out. Signed word loads don't fit because there is no
> -// W version.
> -multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
> - string prefix> {
> - // Unsigned offset
> - def w : A64I_LSunsigimm<size, 0b0, 0b11,
> - (outs GPR32:$Rt),
> - (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
> - "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - }
> - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
> - (!cast<Instruction>(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>;
> -
> - def x : A64I_LSunsigimm<size, 0b0, 0b10,
> - (outs GPR64:$Rt),
> - (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
> - "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - }
> - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
> - (!cast<Instruction>(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>;
> -
> - // Register offset
> - let mayLoad = 1 in {
> - def w_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b0,
> - (outs GPR32:$Rt),
> - (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
> - "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd, ReadLd]>;
> -
> - def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1,
> - (outs GPR32:$Rt),
> - (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
> - "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd, ReadLd]>;
> -
> - def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0,
> - (outs GPR64:$Rt),
> - (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
> - "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd, ReadLd]>;
> -
> - def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1,
> - (outs GPR64:$Rt),
> - (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
> - "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd, ReadLd]>;
> - }
> - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
> - (!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn,
> - GPR64:$Rm, 2)>;
> -
> - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
> - (!cast<Instruction>(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn,
> - GPR64:$Rm, 2)>;
> -
> -
> - let mayLoad = 1 in {
> - // Unaligned offset
> - def w_U : A64I_LSunalimm<size, 0b0, 0b11,
> - (outs GPR32:$Rt),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]>;
> -
> - def x_U : A64I_LSunalimm<size, 0b0, 0b10,
> - (outs GPR64:$Rt),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]>;
> -
> -
> - // Post-indexed
> - def w_PostInd : A64I_LSpostind<size, 0b0, 0b11,
> - (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, ReadLd]> {
> - let Constraints = "$Rn = $Rn_wb";
> - let DecoderMethod = "DecodeSingleIndexedInstruction";
> - }
> -
> - def x_PostInd : A64I_LSpostind<size, 0b0, 0b10,
> - (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, ReadLd]> {
> - let Constraints = "$Rn = $Rn_wb";
> - let DecoderMethod = "DecodeSingleIndexedInstruction";
> - }
> -
> - // Pre-indexed
> - def w_PreInd : A64I_LSpreind<size, 0b0, 0b11,
> - (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, ReadLd]> {
> - let Constraints = "$Rn = $Rn_wb";
> - let DecoderMethod = "DecodeSingleIndexedInstruction";
> - }
> -
> - def x_PreInd : A64I_LSpreind<size, 0b0, 0b10,
> - (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, ReadLd]> {
> - let Constraints = "$Rn = $Rn_wb";
> - let DecoderMethod = "DecodeSingleIndexedInstruction";
> - }
> - } // let mayLoad = 1
> -}
> -
> -// LDRSB
> -defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">;
> -// LDRSH
> -defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">;
> -
> -// LDRSW: load a 32-bit register, sign-extending to 64-bits.
> -def LDRSWx
> - : A64I_LSunsigimm<0b10, 0b0, 0b10,
> - (outs GPR64:$Rt),
> - (ins GPR64xsp:$Rn, word_uimm12:$UImm12),
> - "ldrsw\t$Rt, [$Rn, $UImm12]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]> {
> - let mayLoad = 1;
> -}
> -def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -let mayLoad = 1 in {
> - def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0,
> - (outs GPR64:$Rt),
> - (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext),
> - "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd, ReadLd]>;
> -
> - def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1,
> - (outs GPR64:$Rt),
> - (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext),
> - "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd, ReadLd]>;
> -}
> -def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]",
> - (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>;
> -
> -
> -def LDURSWx
> - : A64I_LSunalimm<0b10, 0b0, 0b10,
> - (outs GPR64:$Rt),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldursw\t$Rt, [$Rn, $SImm9]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]> {
> - let mayLoad = 1;
> -}
> -def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -def LDRSWx_PostInd
> - : A64I_LSpostind<0b10, 0b0, 0b10,
> - (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldrsw\t$Rt, [$Rn], $SImm9",
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - let Constraints = "$Rn = $Rn_wb";
> - let DecoderMethod = "DecodeSingleIndexedInstruction";
> -}
> -
> -def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10,
> - (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldrsw\t$Rt, [$Rn, $SImm9]!",
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - let Constraints = "$Rn = $Rn_wb";
> - let DecoderMethod = "DecodeSingleIndexedInstruction";
> -}
> -
> -//===------------------------------
> -// 2.4 Prefetch operations
> -//===------------------------------
> -
> -def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs),
> - (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12),
> - "prfm\t$Rt, [$Rn, $UImm12]",
> - [], NoItinerary>,
> - Sched<[WritePreLd, ReadPreLd]> {
> - let mayLoad = 1;
> -}
> -def : InstAlias<"prfm $Rt, [$Rn]",
> - (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -let mayLoad = 1 in {
> - def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs),
> - (ins prefetch_op:$Rt, GPR64xsp:$Rn,
> - GPR32:$Rm, dword_Wm_regext:$Ext),
> - "prfm\t$Rt, [$Rn, $Rm, $Ext]",
> - [], NoItinerary>,
> - Sched<[WritePreLd, ReadPreLd]>;
> - def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs),
> - (ins prefetch_op:$Rt, GPR64xsp:$Rn,
> - GPR64:$Rm, dword_Xm_regext:$Ext),
> - "prfm\t$Rt, [$Rn, $Rm, $Ext]",
> - [], NoItinerary>,
> - Sched<[WritePreLd, ReadPreLd]>;
> -}
> -
> -def : InstAlias<"prfm $Rt, [$Rn, $Rm]",
> - (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn,
> - GPR64:$Rm, 2)>;
> -
> -
> -def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs),
> - (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
> - "prfum\t$Rt, [$Rn, $SImm9]",
> - [], NoItinerary>,
> - Sched<[WritePreLd, ReadPreLd]> {
> - let mayLoad = 1;
> -}
> -def : InstAlias<"prfum $Rt, [$Rn]",
> - (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Load-store register (unprivileged) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH
> -
> -// These instructions very much mirror the "unscaled immediate" loads, but since
> -// there are no floating-point variants we need to split them out into their own
> -// section to avoid instantiation of "ldtr d0, [sp]" etc.
> -
> -multiclass A64I_LDTRSTTR<bits<2> size, string asmsuffix, RegisterClass GPR,
> - string prefix> {
> - def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00,
> - (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
> - "sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]> {
> - let mayStore = 1;
> - }
> -
> - def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]",
> - (!cast<Instruction>(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
> -
> - def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01,
> - (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - }
> -
> - def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]",
> - (!cast<Instruction>(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -}
> -
> -// STTRB/LDTRB: First define the instructions
> -defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">;
> -
> -// STTRH/LDTRH
> -defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">;
> -
> -// STTR/LDTR to/from a W register
> -defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">;
> -
> -// STTR/LDTR to/from an X register
> -defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">;
> -
> -// Now a class for the signed instructions that can go to either 32 or 64
> -// bits...
> -multiclass A64I_LDTR_signed<bits<2> size, string asmopcode, string prefix> {
> - let mayLoad = 1 in {
> - def w : A64I_LSunpriv<size, 0b0, 0b11,
> - (outs GPR32:$Rt),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]>;
> -
> - def x : A64I_LSunpriv<size, 0b0, 0b10,
> - (outs GPR64:$Rt),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]>;
> - }
> -
> - def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
> - (!cast<Instruction>(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>;
> -
> - def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
> - (!cast<Instruction>(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -}
> -
> -// LDTRSB
> -defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">;
> -// LDTRSH
> -defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">;
> -
> -// And finally LDTRSW which only goes to 64 bits.
> -def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10,
> - (outs GPR64:$Rt),
> - (ins GPR64xsp:$Rn, simm9:$SImm9),
> - "ldtrsw\t$Rt, [$Rn, $SImm9]",
> - [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]> {
> - let mayLoad = 1;
> -}
> -def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Load-store register pair (offset) instructions
> -//===----------------------------------------------------------------------===//
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store register pair (post-indexed) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: STP, LDP, LDPSW
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store register pair (pre-indexed) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: STP, LDP, LDPSW
> -//
> -// and
> -//
> -//===----------------------------------------------------------------------===//
> -// Load-store non-temporal register pair (offset) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: STNP, LDNP
> -
> -
> -// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
> -// know the access size via some means. An isolated operand does not have this
> -// information unless told from here, which means we need separate tablegen
> -// Operands for each access size. This multiclass takes care of instantiating
> -// the correct template functions in the rest of the backend.
> -
> -multiclass offsets_simm7<string MemSize, string prefix> {
> - // The bare signed 7-bit immediate is used in post-indexed instructions, but
> - // because of the scaling performed a generic "simm7" operand isn't
> - // appropriate here either.
> - def simm7_asmoperand : AsmOperandClass {
> - let Name = "SImm7_Scaled" # MemSize;
> - let PredicateMethod = "isSImm7Scaled<" # MemSize # ">";
> - let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">";
> - let DiagnosticType = "LoadStoreSImm7_" # MemSize;
> - }
> -
> - def simm7 : Operand<i64> {
> - let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">";
> - let ParserMatchClass = !cast<AsmOperandClass>(prefix # "simm7_asmoperand");
> - }
> -}
> -
> -defm word_ : offsets_simm7<"4", "word_">;
> -defm dword_ : offsets_simm7<"8", "dword_">;
> -defm qword_ : offsets_simm7<"16", "qword_">;
> -
> -multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
> - Operand simm7, string prefix> {
> - def _STR : A64I_LSPoffset<opc, v, 0b0, (outs),
> - (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
> - "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
> - Sched<[WriteLd, ReadLd]> {
> - let mayStore = 1;
> - let DecoderMethod = "DecodeLDSTPairInstruction";
> - }
> - def : InstAlias<"stp $Rt, $Rt2, [$Rn]",
> - (!cast<Instruction>(prefix # "_STR") SomeReg:$Rt,
> - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
> -
> - def _LDR : A64I_LSPoffset<opc, v, 0b1,
> - (outs SomeReg:$Rt, SomeReg:$Rt2),
> - (ins GPR64xsp:$Rn, simm7:$SImm7),
> - "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - let DecoderMethod = "DecodeLDSTPairInstruction";
> - }
> - def : InstAlias<"ldp $Rt, $Rt2, [$Rn]",
> - (!cast<Instruction>(prefix # "_LDR") SomeReg:$Rt,
> - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
> -
> - def _PostInd_STR : A64I_LSPpostind<opc, v, 0b0,
> - (outs GPR64xsp:$Rn_wb),
> - (ins SomeReg:$Rt, SomeReg:$Rt2,
> - GPR64xsp:$Rn,
> - simm7:$SImm7),
> - "stp\t$Rt, $Rt2, [$Rn], $SImm7",
> - [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
> - let mayStore = 1;
> - let Constraints = "$Rn = $Rn_wb";
> -
> - // Decoder only needed for unpredictability checking (FIXME).
> - let DecoderMethod = "DecodeLDSTPairInstruction";
> - }
> -
> - def _PostInd_LDR : A64I_LSPpostind<opc, v, 0b1,
> - (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
> - (ins GPR64xsp:$Rn, simm7:$SImm7),
> - "ldp\t$Rt, $Rt2, [$Rn], $SImm7",
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - let Constraints = "$Rn = $Rn_wb";
> - let DecoderMethod = "DecodeLDSTPairInstruction";
> - }
> -
> - def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb),
> - (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
> - "stp\t$Rt, $Rt2, [$Rn, $SImm7]!",
> - [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
> - let mayStore = 1;
> - let Constraints = "$Rn = $Rn_wb";
> - let DecoderMethod = "DecodeLDSTPairInstruction";
> - }
> -
> - def _PreInd_LDR : A64I_LSPpreind<opc, v, 0b1,
> - (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
> - (ins GPR64xsp:$Rn, simm7:$SImm7),
> - "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!",
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - let Constraints = "$Rn = $Rn_wb";
> - let DecoderMethod = "DecodeLDSTPairInstruction";
> - }
> -
> - def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs),
> - (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
> - "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
> - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
> - let mayStore = 1;
> - let DecoderMethod = "DecodeLDSTPairInstruction";
> - }
> - def : InstAlias<"stnp $Rt, $Rt2, [$Rn]",
> - (!cast<Instruction>(prefix # "_NonTemp_STR") SomeReg:$Rt,
> - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
> -
> - def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1,
> - (outs SomeReg:$Rt, SomeReg:$Rt2),
> - (ins GPR64xsp:$Rn, simm7:$SImm7),
> - "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - let DecoderMethod = "DecodeLDSTPairInstruction";
> - }
> - def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]",
> - (!cast<Instruction>(prefix # "_NonTemp_LDR") SomeReg:$Rt,
> - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
> -
> -}
> -
> -
> -defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">;
> -defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">;
> -
> -let Predicates = [HasFPARMv8] in {
> -defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">;
> -defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">;
> -defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7,
> - "LSFPPair128">;
> -}
> -
> -
> -def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
> - (outs GPR64:$Rt, GPR64:$Rt2),
> - (ins GPR64xsp:$Rn, word_simm7:$SImm7),
> - "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - let DecoderMethod = "DecodeLDSTPairInstruction";
> -}
> -def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]",
> - (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>;
> -
> -def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1,
> - (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
> - (ins GPR64xsp:$Rn, word_simm7:$SImm7),
> - "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7",
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - let Constraints = "$Rn = $Rn_wb";
> - let DecoderMethod = "DecodeLDSTPairInstruction";
> -}
> -
> -def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1,
> - (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
> - (ins GPR64xsp:$Rn, word_simm7:$SImm7),
> - "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!",
> - [], NoItinerary>,
> - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
> - let mayLoad = 1;
> - let Constraints = "$Rn = $Rn_wb";
> - let DecoderMethod = "DecodeLDSTPairInstruction";
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Logical (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV
> -
> -multiclass logical_imm_operands<string prefix, string note,
> - int size, ValueType VT> {
> - def _asmoperand : AsmOperandClass {
> - let Name = "LogicalImm" # note # size;
> - let PredicateMethod = "isLogicalImm" # note # "<" # size # ">";
> - let RenderMethod = "addLogicalImmOperands<" # size # ">";
> - let DiagnosticType = "LogicalSecondSource";
> - }
> -
> - def _operand
> - : Operand<VT>, ComplexPattern<VT, 1, "SelectLogicalImm", [imm]> {
> - let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
> - let PrintMethod = "printLogicalImmOperand<" # size # ">";
> - let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">";
> - }
> -}
> -
> -defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>;
> -defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>;
> -
> -// The mov versions only differ in assembly parsing, where they
> -// exclude values representable with either MOVZ or MOVN.
> -defm logical_imm32_mov
> - : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>;
> -defm logical_imm64_mov
> - : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>;
> -
> -
> -multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode> {
> - def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd),
> - (ins GPR32:$Rn, logical_imm32_operand:$Imm),
> - !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
> - [(set i32:$Rd,
> - (opnode i32:$Rn, logical_imm32_operand:$Imm))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU]>;
> -
> - def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd),
> - (ins GPR64:$Rn, logical_imm64_operand:$Imm),
> - !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
> - [(set i64:$Rd,
> - (opnode i64:$Rn, logical_imm64_operand:$Imm))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU]>;
> -}
> -
> -defm AND : A64I_logimmSizes<0b00, "and", and>;
> -defm ORR : A64I_logimmSizes<0b01, "orr", or>;
> -defm EOR : A64I_logimmSizes<0b10, "eor", xor>;
> -
> -let Defs = [NZCV] in {
> - def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd),
> - (ins GPR32:$Rn, logical_imm32_operand:$Imm),
> - "ands\t$Rd, $Rn, $Imm",
> - [], NoItinerary>,
> - Sched<[WriteALU, ReadALU]>;
> -
> - def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd),
> - (ins GPR64:$Rn, logical_imm64_operand:$Imm),
> - "ands\t$Rd, $Rn, $Imm",
> - [], NoItinerary>,
> - Sched<[WriteALU, ReadALU]>;
> -}
> -
> -def : InstAlias<"tst $Rn, $Imm",
> - (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>;
> -def : InstAlias<"tst $Rn, $Imm",
> - (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>;
> -// FIXME: these sometimes are canonical.
> -def : InstAlias<"mov $Rd, $Imm",
> - (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm), 0>;
> -def : InstAlias<"mov $Rd, $Imm",
> - (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm), 0>;
> -
> -//===----------------------------------------------------------------------===//
> -// Logical (shifted register) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV
> -
> -// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS"
> -// behaves differently for unsigned comparisons, so we defensively only allow
> -// signed or n/a as the operand. In practice "unsigned greater than 0" is "not
> -// equal to 0" and LLVM gives us this.
> -def signed_cond : PatLeaf<(cond), [{
> - return !isUnsignedIntSetCC(N->get());
> -}]>;
> -
> -
> -// These instructions share their "shift" operands with add/sub (shifted
> -// register instructions). They are defined there.
> -
> -// N.b. the commutable parameter is just !N. It will be first against the wall
> -// when the revolution comes.
> -multiclass logical_shifts<string prefix, bit sf, bits<2> opc,
> - bit N, bit commutable,
> - string asmop, SDPatternOperator opfrag, ValueType ty,
> - RegisterClass GPR, list<Register> defs> {
> - let isCommutable = commutable, Defs = defs in {
> - def _lsl : A64I_logicalshift<sf, opc, 0b00, N,
> - (outs GPR:$Rd),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("lsl_operand_" # ty):$Imm6),
> - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> - [(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
> - !cast<Operand>("lsl_operand_" # ty):$Imm6))
> - )],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> - def _lsr : A64I_logicalshift<sf, opc, 0b01, N,
> - (outs GPR:$Rd),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("lsr_operand_" # ty):$Imm6),
> - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> - [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
> - !cast<Operand>("lsr_operand_" # ty):$Imm6))
> - )],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> - def _asr : A64I_logicalshift<sf, opc, 0b10, N,
> - (outs GPR:$Rd),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("asr_operand_" # ty):$Imm6),
> - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> - [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
> - !cast<Operand>("asr_operand_" # ty):$Imm6))
> - )],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> - def _ror : A64I_logicalshift<sf, opc, 0b11, N,
> - (outs GPR:$Rd),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("ror_operand_" # ty):$Imm6),
> - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
> - [(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm,
> - !cast<Operand>("ror_operand_" # ty):$Imm6))
> - )],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> - }
> -
> - def _noshift
> - : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
> - (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
> - GPR:$Rm, 0)>;
> -
> - def : Pat<(opfrag ty:$Rn, ty:$Rm),
> - (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
> -}
> -
> -multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable,
> - string asmop, SDPatternOperator opfrag,
> - list<Register> defs> {
> - defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N,
> - commutable, asmop, opfrag, i64, GPR64, defs>;
> - defm www : logical_shifts<prefix # "www", 0b0, opc, N,
> - commutable, asmop, opfrag, i32, GPR32, defs>;
> -}
> -
> -
> -defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>;
> -defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>;
> -defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>;
> -defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands",
> - PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs),
> - [{ (void)N; return false; }]>,
> - [NZCV]>;
> -
> -defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic",
> - PatFrag<(ops node:$lhs, node:$rhs),
> - (and node:$lhs, (not node:$rhs))>, []>;
> -defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn",
> - PatFrag<(ops node:$lhs, node:$rhs),
> - (or node:$lhs, (not node:$rhs))>, []>;
> -defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon",
> - PatFrag<(ops node:$lhs, node:$rhs),
> - (xor node:$lhs, (not node:$rhs))>, []>;
> -defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics",
> - PatFrag<(ops node:$lhs, node:$rhs),
> - (and node:$lhs, (not node:$rhs)),
> - [{ (void)N; return false; }]>,
> - [NZCV]>;
> -
> -multiclass tst_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
> - let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in {
> - def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0,
> - (outs),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("lsl_operand_" # ty):$Imm6),
> - "tst\t$Rn, $Rm, $Imm6",
> - [(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm,
> - !cast<Operand>("lsl_operand_" # ty):$Imm6)),
> - 0, signed_cond))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -
> - def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0,
> - (outs),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("lsr_operand_" # ty):$Imm6),
> - "tst\t$Rn, $Rm, $Imm6",
> - [(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm,
> - !cast<Operand>("lsr_operand_" # ty):$Imm6)),
> - 0, signed_cond))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> - def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0,
> - (outs),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("asr_operand_" # ty):$Imm6),
> - "tst\t$Rn, $Rm, $Imm6",
> - [(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm,
> - !cast<Operand>("asr_operand_" # ty):$Imm6)),
> - 0, signed_cond))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> - def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0,
> - (outs),
> - (ins GPR:$Rn, GPR:$Rm,
> - !cast<Operand>("ror_operand_" # ty):$Imm6),
> - "tst\t$Rn, $Rm, $Imm6",
> - [(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm,
> - !cast<Operand>("ror_operand_" # ty):$Imm6)),
> - 0, signed_cond))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> - }
> -
> - def _noshift : InstAlias<"tst $Rn, $Rm",
> - (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
> -
> - def : Pat<(A64setcc (and ty:$Rn, ty:$Rm), 0, signed_cond),
> - (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
> -}
> -
> -defm TSTxx : tst_shifts<"TSTxx", 0b1, i64, GPR64>;
> -defm TSTww : tst_shifts<"TSTww", 0b0, i32, GPR32>;
> -
> -
> -multiclass mvn_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
> - let isCommutable = 0, Rn = 0b11111 in {
> - def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1,
> - (outs GPR:$Rd),
> - (ins GPR:$Rm,
> - !cast<Operand>("lsl_operand_" # ty):$Imm6),
> - "mvn\t$Rd, $Rm, $Imm6",
> - [(set ty:$Rd, (not (shl ty:$Rm,
> - !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> -
> - def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1,
> - (outs GPR:$Rd),
> - (ins GPR:$Rm,
> - !cast<Operand>("lsr_operand_" # ty):$Imm6),
> - "mvn\t$Rd, $Rm, $Imm6",
> - [(set ty:$Rd, (not (srl ty:$Rm,
> - !cast<Operand>("lsr_operand_" # ty):$Imm6)))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> - def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1,
> - (outs GPR:$Rd),
> - (ins GPR:$Rm,
> - !cast<Operand>("asr_operand_" # ty):$Imm6),
> - "mvn\t$Rd, $Rm, $Imm6",
> - [(set ty:$Rd, (not (sra ty:$Rm,
> - !cast<Operand>("asr_operand_" # ty):$Imm6)))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> -
> - def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1,
> - (outs GPR:$Rd),
> - (ins GPR:$Rm,
> - !cast<Operand>("ror_operand_" # ty):$Imm6),
> - "mvn\t$Rd, $Rm, $Imm6",
> - [(set ty:$Rd, (not (rotr ty:$Rm,
> - !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
> - NoItinerary>,
> - Sched<[WriteALU, ReadALU, ReadALU]>;
> - }
> -
> - def _noshift : InstAlias<"mvn $Rn, $Rm",
> - (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
> -
> - def : Pat<(not ty:$Rm),
> - (!cast<Instruction>(prefix # "_lsl") $Rm, 0)>;
> -}
> -
> -defm MVNxx : mvn_shifts<"MVNxx", 0b1, i64, GPR64>;
> -defm MVNww : mvn_shifts<"MVNww", 0b0, i32, GPR32>;
> -
> -def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
> -def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Move wide (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: MOVN, MOVZ, MOVK + MOV aliases
> -
> -// A wide variety of different relocations are needed for variants of these
> -// instructions, so it turns out that we need a different operand for all of
> -// them.
> -multiclass movw_operands<string prefix, string instname, int width> {
> - def _imm_asmoperand : AsmOperandClass {
> - let Name = instname # width # "Shifted" # shift;
> - let PredicateMethod = "is" # instname # width # "Imm";
> - let RenderMethod = "addMoveWideImmOperands";
> - let ParserMethod = "ParseImmWithLSLOperand";
> - let DiagnosticType = "MOVWUImm16";
> - }
> -
> - def _imm : Operand<i64> {
> - let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand");
> - let PrintMethod = "printMoveWideImmOperand";
> - let EncoderMethod = "getMoveWideImmOpValue";
> - let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">";
> -
> - let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
> - }
> -}
> -
> -defm movn32 : movw_operands<"movn32", "MOVN", 32>;
> -defm movn64 : movw_operands<"movn64", "MOVN", 64>;
> -defm movz32 : movw_operands<"movz32", "MOVZ", 32>;
> -defm movz64 : movw_operands<"movz64", "MOVZ", 64>;
> -defm movk32 : movw_operands<"movk32", "MOVK", 32>;
> -defm movk64 : movw_operands<"movk64", "MOVK", 64>;
> -
> -multiclass A64I_movwSizes<bits<2> opc, string asmop, dag ins32bit,
> - dag ins64bit> {
> -
> - def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit,
> - !strconcat(asmop, "\t$Rd, $FullImm"),
> - [], NoItinerary>,
> - Sched<[WriteALU]> {
> - bits<18> FullImm;
> - let UImm16 = FullImm{15-0};
> - let Shift = FullImm{17-16};
> - }
> -
> - def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit,
> - !strconcat(asmop, "\t$Rd, $FullImm"),
> - [], NoItinerary>,
> - Sched<[WriteALU]> {
> - bits<18> FullImm;
> - let UImm16 = FullImm{15-0};
> - let Shift = FullImm{17-16};
> - }
> -}
> -
> -let isMoveImm = 1, isReMaterializable = 1,
> - isAsCheapAsAMove = 1, hasSideEffects = 0 in {
> - defm MOVN : A64I_movwSizes<0b00, "movn",
> - (ins movn32_imm:$FullImm),
> - (ins movn64_imm:$FullImm)>;
> -
> - // Some relocations are able to convert between a MOVZ and a MOVN. If these
> - // are applied the instruction must be emitted with the corresponding bits as
> - // 0, which means a MOVZ needs to override that bit from the default.
> - let PostEncoderMethod = "fixMOVZ" in
> - defm MOVZ : A64I_movwSizes<0b10, "movz",
> - (ins movz32_imm:$FullImm),
> - (ins movz64_imm:$FullImm)>;
> -}
> -
> -let Constraints = "$src = $Rd",
> - SchedRW = [WriteALU, ReadALU] in
> -defm MOVK : A64I_movwSizes<0b11, "movk",
> - (ins GPR32:$src, movk32_imm:$FullImm),
> - (ins GPR64:$src, movk64_imm:$FullImm)>;
> -
> -
> -// And now the "MOV" aliases. These also need their own operands because what
> -// they accept is completely different to what the base instructions accept.
> -multiclass movalias_operand<string prefix, string basename,
> - string immpredicate, int width> {
> - def _asmoperand : AsmOperandClass {
> - let Name = basename # width # "MovAlias";
> - let PredicateMethod
> - = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">";
> - let RenderMethod
> - = "addMoveWideMovAliasOperands<" # width # ", "
> - # "A64Imms::" # immpredicate # ">";
> - }
> -
> - def _movimm : Operand<i64> {
> - let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
> -
> - let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
> - }
> -}
> -
> -defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>;
> -defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>;
> -defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>;
> -defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>;
> -
> -// FIXME: these are officially canonical aliases, but TableGen is too limited to
> -// print them at the moment. I believe in this case an "AliasPredicate" method
> -// will need to be implemented. to allow it, as well as the more generally
> -// useful handling of non-register, non-constant operands.
> -class movalias<Instruction INST, RegisterClass GPR, Operand operand>
> - : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm), 0>;
> -
> -def : movalias<MOVZwii, GPR32, movz32_movimm>;
> -def : movalias<MOVZxii, GPR64, movz64_movimm>;
> -def : movalias<MOVNwii, GPR32, movn32_movimm>;
> -def : movalias<MOVNxii, GPR64, movn64_movimm>;
> -
> -def movw_addressref_g0 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<0>">;
> -def movw_addressref_g1 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<1>">;
> -def movw_addressref_g2 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<2>">;
> -def movw_addressref_g3 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<3>">;
> -
> -def : Pat<(A64WrapperLarge movw_addressref_g3:$G3, movw_addressref_g2:$G2,
> - movw_addressref_g1:$G1, movw_addressref_g0:$G0),
> - (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref_g3:$G3),
> - movw_addressref_g2:$G2),
> - movw_addressref_g1:$G1),
> - movw_addressref_g0:$G0)>;
> -
> -//===----------------------------------------------------------------------===//
> -// PC-relative addressing instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: ADR, ADRP
> -
> -def adr_label : Operand<i64> {
> - let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_adr_prel>";
> -
> - // This label is a 21-bit offset from PC, unscaled
> - let PrintMethod = "printLabelOperand<21, 1>";
> - let ParserMatchClass = label_asmoperand<21, 1>;
> - let OperandType = "OPERAND_PCREL";
> -}
> -
> -def adrp_label_asmoperand : AsmOperandClass {
> - let Name = "AdrpLabel";
> - let RenderMethod = "addLabelOperands<21, 4096>";
> - let DiagnosticType = "Label";
> -}
> -
> -def adrp_label : Operand<i64> {
> - let EncoderMethod = "getAdrpLabelOpValue";
> -
> - // This label is a 21-bit offset from PC, scaled by the page-size: 4096.
> - let PrintMethod = "printLabelOperand<21, 4096>";
> - let ParserMatchClass = adrp_label_asmoperand;
> - let OperandType = "OPERAND_PCREL";
> -}
> -
> -let hasSideEffects = 0 in {
> - def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label),
> - "adr\t$Rd, $Label", [], NoItinerary>,
> - Sched<[WriteALUs]>;
> -
> - def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label),
> - "adrp\t$Rd, $Label", [], NoItinerary>,
> - Sched<[WriteALUs]>;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// System instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS
> -// + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL
> -
> -// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values.
> -def uimm3_asmoperand : AsmOperandClass {
> - let Name = "UImm3";
> - let PredicateMethod = "isUImm<3>";
> - let RenderMethod = "addImmOperands";
> - let DiagnosticType = "UImm3";
> -}
> -
> -def uimm3 : Operand<i32> {
> - let ParserMatchClass = uimm3_asmoperand;
> -}
> -
> -// The HINT alias can accept a simple unsigned 7-bit immediate.
> -def uimm7_asmoperand : AsmOperandClass {
> - let Name = "UImm7";
> - let PredicateMethod = "isUImm<7>";
> - let RenderMethod = "addImmOperands";
> - let DiagnosticType = "UImm7";
> -}
> -
> -def uimm7 : Operand<i32> {
> - let ParserMatchClass = uimm7_asmoperand;
> -}
> -
> -// Multiclass namedimm is defined with the prefetch operands. Most of these fit
> -// into the NamedImmMapper scheme well: they either accept a named operand or
> -// any immediate under a particular value (which may be 0, implying no immediate
> -// is allowed).
> -defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">;
> -defm isb : namedimm<"isb", "A64ISB::ISBMapper">;
> -defm ic : namedimm<"ic", "A64IC::ICMapper">;
> -defm dc : namedimm<"dc", "A64DC::DCMapper">;
> -defm at : namedimm<"at", "A64AT::ATMapper">;
> -defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">;
> -
> -// However, MRS and MSR are more complicated for a few reasons:
> -// * There are ~1000 generic names S3_<op1>_<CRn>_<CRm>_<Op2> which have an
> -// implementation-defined effect
> -// * Most registers are shared, but some are read-only or write-only.
> -// * There is a variant of MSR which accepts the same register name (SPSel),
> -// but which would have a different encoding.
> -
> -// In principle these could be resolved in with more complicated subclasses of
> -// NamedImmMapper, however that imposes an overhead on other "named
> -// immediates". Both in concrete terms with virtual tables and in unnecessary
> -// abstraction.
> -
> -// The solution adopted here is to take the MRS/MSR Mappers out of the usual
> -// hierarchy (they're not derived from NamedImmMapper) and to add logic for
> -// their special situation.
> -def mrs_asmoperand : AsmOperandClass {
> - let Name = "MRS";
> - let ParserMethod = "ParseSysRegOperand";
> - let DiagnosticType = "MRS";
> -}
> -
> -def mrs_op : Operand<i32> {
> - let ParserMatchClass = mrs_asmoperand;
> - let PrintMethod = "printMRSOperand";
> - let DecoderMethod = "DecodeMRSOperand";
> -}
> -
> -def msr_asmoperand : AsmOperandClass {
> - let Name = "MSRWithReg";
> -
> - // Note that SPSel is valid for both this and the pstate operands, but with
> - // different immediate encodings. This is why these operands provide a string
> - // AArch64Operand rather than an immediate. The overlap is small enough that
> - // it could be resolved with hackery now, but who can say in future?
> - let ParserMethod = "ParseSysRegOperand";
> - let DiagnosticType = "MSR";
> -}
> -
> -def msr_op : Operand<i32> {
> - let ParserMatchClass = msr_asmoperand;
> - let PrintMethod = "printMSROperand";
> - let DecoderMethod = "DecodeMSROperand";
> -}
> -
> -def pstate_asmoperand : AsmOperandClass {
> - let Name = "MSRPState";
> - // See comment above about parser.
> - let ParserMethod = "ParseSysRegOperand";
> - let DiagnosticType = "MSR";
> -}
> -
> -def pstate_op : Operand<i32> {
> - let ParserMatchClass = pstate_asmoperand;
> - let PrintMethod = "printNamedImmOperand<A64PState::PStateMapper>";
> - let DecoderMethod = "DecodeNamedImmOperand<A64PState::PStateMapper>";
> -}
> -
> -// When <CRn> is specified, an assembler should accept something like "C4", not
> -// the usual "#4" immediate.
> -def CRx_asmoperand : AsmOperandClass {
> - let Name = "CRx";
> - let PredicateMethod = "isUImm<4>";
> - let RenderMethod = "addImmOperands";
> - let ParserMethod = "ParseCRxOperand";
> - // Diagnostics are handled in all cases by ParseCRxOperand.
> -}
> -
> -def CRx : Operand<i32> {
> - let ParserMatchClass = CRx_asmoperand;
> - let PrintMethod = "printCRxOperand";
> -}
> -
> -
> -// Finally, we can start defining the instructions.
> -
> -// HINT is straightforward, with a few aliases.
> -def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7",
> - [], NoItinerary> {
> - bits<7> UImm7;
> - let CRm = UImm7{6-3};
> - let Op2 = UImm7{2-0};
> -
> - let Op0 = 0b00;
> - let Op1 = 0b011;
> - let CRn = 0b0010;
> - let Rt = 0b11111;
> -}
> -
> -def : InstAlias<"nop", (HINTi 0)>;
> -def : InstAlias<"yield", (HINTi 1)>;
> -def : InstAlias<"wfe", (HINTi 2)>;
> -def : InstAlias<"wfi", (HINTi 3)>;
> -def : InstAlias<"sev", (HINTi 4)>;
> -def : InstAlias<"sevl", (HINTi 5)>;
> -
> -// Quite a few instructions then follow a similar pattern of fixing common
> -// fields in the bitpattern, we'll define a helper-class for them.
> -class simple_sys<bits<2> op0, bits<3> op1, bits<4> crn, bits<3> op2,
> - Operand operand, string asmop>
> - : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"),
> - [], NoItinerary> {
> - let Op0 = op0;
> - let Op1 = op1;
> - let CRn = crn;
> - let Op2 = op2;
> - let Rt = 0b11111;
> -}
> -
> -
> -def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">;
> -def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">;
> -def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">;
> -def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">;
> -
> -def : InstAlias<"clrex", (CLREXi 0b1111)>;
> -def : InstAlias<"isb", (ISBi 0b1111)>;
> -
> -// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP
> -// configurations at least.
> -def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>;
> -
> -// Any SYS bitpattern can be represented with a complex and opaque "SYS"
> -// instruction.
> -def SYSiccix : A64I_system<0b0, (outs),
> - (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm,
> - uimm3:$Op2, GPR64:$Rt),
> - "sys\t$Op1, $CRn, $CRm, $Op2, $Rt",
> - [], NoItinerary> {
> - let Op0 = 0b01;
> -}
> -
> -// You can skip the Xt argument whether it makes sense or not for the generic
> -// SYS instruction.
> -def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2",
> - (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>;
> -
> -
> -// But many have aliases, which obviously don't fit into
> -class SYSalias<dag ins, string asmstring>
> - : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> {
> - let isAsmParserOnly = 1;
> -
> - bits<14> SysOp;
> - let Op0 = 0b01;
> - let Op1 = SysOp{13-11};
> - let CRn = SysOp{10-7};
> - let CRm = SysOp{6-3};
> - let Op2 = SysOp{2-0};
> -}
> -
> -def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">;
> -
> -def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> {
> - let Rt = 0b11111;
> -}
> -
> -def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">;
> -def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">;
> -
> -def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">;
> -
> -def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> {
> - let Rt = 0b11111;
> -}
> -
> -
> -def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt),
> - (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2),
> - "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2",
> - [], NoItinerary> {
> - let Op0 = 0b01;
> -}
> -
> -// The instructions themselves are rather simple for MSR and MRS.
> -def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt),
> - "msr\t$SysReg, $Rt", [], NoItinerary> {
> - bits<16> SysReg;
> - let Op0 = SysReg{15-14};
> - let Op1 = SysReg{13-11};
> - let CRn = SysReg{10-7};
> - let CRm = SysReg{6-3};
> - let Op2 = SysReg{2-0};
> -}
> -
> -def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg),
> - "mrs\t$Rt, $SysReg", [], NoItinerary> {
> - bits<16> SysReg;
> - let Op0 = SysReg{15-14};
> - let Op1 = SysReg{13-11};
> - let CRn = SysReg{10-7};
> - let CRm = SysReg{6-3};
> - let Op2 = SysReg{2-0};
> -}
> -
> -def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm),
> - "msr\t$PState, $CRm", [], NoItinerary> {
> - bits<6> PState;
> -
> - let Op0 = 0b00;
> - let Op1 = PState{5-3};
> - let CRn = 0b0100;
> - let Op2 = PState{2-0};
> - let Rt = 0b11111;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Test & branch (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: TBZ, TBNZ
> -
> -// The bit to test is a simple unsigned 6-bit immediate in the X-register
> -// versions.
> -def uimm6 : Operand<i64> {
> - let ParserMatchClass = uimm6_asmoperand;
> -}
> -
> -def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>;
> -
> -def tbimm_target : Operand<OtherVT> {
> - let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_tstbr>";
> -
> - // This label is a 14-bit offset from PC, scaled by the instruction-width: 4.
> - let PrintMethod = "printLabelOperand<14, 4>";
> - let ParserMatchClass = label_wid14_scal4_asmoperand;
> -
> - let OperandType = "OPERAND_PCREL";
> -}
> -
> -def A64eq : ImmLeaf<i32, [{ return Imm == A64CC::EQ; }]>;
> -def A64ne : ImmLeaf<i32, [{ return Imm == A64CC::NE; }]>;
> -
> -// These instructions correspond to patterns involving "and" with a power of
> -// two, which we need to be able to select.
> -def tstb64_pat : ComplexPattern<i64, 1, "SelectTSTBOperand<64>">;
> -def tstb32_pat : ComplexPattern<i32, 1, "SelectTSTBOperand<32>">;
> -
> -let isBranch = 1, isTerminator = 1 in {
> - def TBZxii : A64I_TBimm<0b0, (outs),
> - (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
> - "tbz\t$Rt, $Imm, $Label",
> - [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
> - A64eq, bb:$Label)],
> - NoItinerary>,
> - Sched<[WriteBr]>;
> -
> - def TBNZxii : A64I_TBimm<0b1, (outs),
> - (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
> - "tbnz\t$Rt, $Imm, $Label",
> - [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
> - A64ne, bb:$Label)],
> - NoItinerary>,
> - Sched<[WriteBr]>;
> -
> -
> - // Note, these instructions overlap with the above 64-bit patterns. This is
> - // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both
> - // do the same thing and are both permitted assembly. They also both have
> - // sensible DAG patterns.
> - def TBZwii : A64I_TBimm<0b0, (outs),
> - (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
> - "tbz\t$Rt, $Imm, $Label",
> - [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
> - A64eq, bb:$Label)],
> - NoItinerary>,
> - Sched<[WriteBr]> {
> - let Imm{5} = 0b0;
> - }
> -
> - def TBNZwii : A64I_TBimm<0b1, (outs),
> - (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
> - "tbnz\t$Rt, $Imm, $Label",
> - [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
> - A64ne, bb:$Label)],
> - NoItinerary>,
> - Sched<[WriteBr]> {
> - let Imm{5} = 0b0;
> - }
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Unconditional branch (immediate) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: B, BL
> -
> -def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>;
> -
> -def bimm_target : Operand<OtherVT> {
> - let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_uncondbr>";
> -
> - // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
> - let PrintMethod = "printLabelOperand<26, 4>";
> - let ParserMatchClass = label_wid26_scal4_asmoperand;
> -
> - let OperandType = "OPERAND_PCREL";
> -}
> -
> -def blimm_target : Operand<i64> {
> - let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_call>";
> -
> - // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
> - let PrintMethod = "printLabelOperand<26, 4>";
> - let ParserMatchClass = label_wid26_scal4_asmoperand;
> -
> - let OperandType = "OPERAND_PCREL";
> -}
> -
> -class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type>
> - : A64I_Bimm<op, (outs), (ins lbl_type:$Label),
> - !strconcat(asmop, "\t$Label"), patterns,
> - NoItinerary>,
> - Sched<[WriteBr]>;
> -
> -let isBranch = 1 in {
> - def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> {
> - let isTerminator = 1;
> - let isBarrier = 1;
> - }
> -
> - let SchedRW = [WriteBrL] in {
> - def BLimm : A64I_BimmImpl<0b1, "bl",
> - [(AArch64Call tglobaladdr:$Label)], blimm_target> {
> - let isCall = 1;
> - let Defs = [X30];
> - }
> - }
> -}
> -
> -def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Unconditional branch (register) instructions
> -//===----------------------------------------------------------------------===//
> -// Contains: BR, BLR, RET, ERET, DRP.
> -
> -// Most of the notional opcode fields in the A64I_Breg format are fixed in A64
> -// at the moment.
> -class A64I_BregImpl<bits<4> opc,
> - dag outs, dag ins, string asmstr, list<dag> patterns,
> - InstrItinClass itin = NoItinerary>
> - : A64I_Breg<opc, 0b11111, 0b000000, 0b00000,
> - outs, ins, asmstr, patterns, itin>,
> - Sched<[WriteBr]> {
> - let isBranch = 1;
> - let isIndirectBranch = 1;
> -}
> -
> -// Note that these are not marked isCall or isReturn because as far as LLVM is
> -// concerned they're not. "ret" is just another jump unless it has been selected
> -// by LLVM as the function's return.
> -
> -let isBranch = 1 in {
> - def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn),
> - "br\t$Rn", [(brind i64:$Rn)]> {
> - let isBarrier = 1;
> - let isTerminator = 1;
> - }
> -
> - let SchedRW = [WriteBrL] in {
> - def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn),
> - "blr\t$Rn", [(AArch64Call i64:$Rn)]> {
> - let isBarrier = 0;
> - let isCall = 1;
> - let Defs = [X30];
> - }
> - }
> -
> - def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn),
> - "ret\t$Rn", []> {
> - let isBarrier = 1;
> - let isTerminator = 1;
> - let isReturn = 1;
> - }
> -
> - // Create a separate pseudo-instruction for codegen to use so that we don't
> - // flag x30 as used in every function. It'll be restored before the RET by the
> - // epilogue if it's legitimately used.
> - def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> {
> - let isTerminator = 1;
> - let isBarrier = 1;
> - let isReturn = 1;
> - }
> -
> - def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> {
> - let Rn = 0b11111;
> - let isBarrier = 1;
> - let isTerminator = 1;
> - let isReturn = 1;
> - }
> -
> - def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> {
> - let Rn = 0b11111;
> - let isBarrier = 1;
> - }
> -}
> -
> -def RETAlias : InstAlias<"ret", (RETx X30)>;
> -
> -
> -//===----------------------------------------------------------------------===//
> -// Address generation patterns
> -//===----------------------------------------------------------------------===//
> -
> -// Primary method of address generation for the small/absolute memory model is
> -// an ADRP/ADR pair:
> -// ADRP x0, some_variable
> -// ADD x0, x0, #:lo12:some_variable
> -//
> -// The load/store elision of the ADD is accomplished when selecting
> -// addressing-modes. This just mops up the cases where that doesn't work and we
> -// really need an address in some register.
> -
> -// This wrapper applies a LO12 modifier to the address. Otherwise we could just
> -// use the same address.
> -
> -class ADRP_ADD<SDNode Wrapper, SDNode addrop>
> - : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)),
> - (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>;
> -
> -def : ADRP_ADD<A64WrapperSmall, tblockaddress>;
> -def : ADRP_ADD<A64WrapperSmall, texternalsym>;
> -def : ADRP_ADD<A64WrapperSmall, tglobaladdr>;
> -def : ADRP_ADD<A64WrapperSmall, tglobaltlsaddr>;
> -def : ADRP_ADD<A64WrapperSmall, tjumptable>;
> -def : ADRP_ADD<A64WrapperSmall, tconstpool>;
> -
> -//===----------------------------------------------------------------------===//
> -// GOT access patterns
> -//===----------------------------------------------------------------------===//
> -
> -class GOTLoadSmall<SDNode addrfrag>
> - : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)),
> - (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>;
> -
> -def : GOTLoadSmall<texternalsym>;
> -def : GOTLoadSmall<tglobaladdr>;
> -def : GOTLoadSmall<tglobaltlsaddr>;
> -
> -//===----------------------------------------------------------------------===//
> -// Tail call handling
> -//===----------------------------------------------------------------------===//
> -
> -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in {
> - def TC_RETURNdi
> - : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff),
> - [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>;
> -
> - def TC_RETURNxi
> - : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff),
> - [(AArch64tcret i64:$dst, (i32 timm:$FPDiff))]>;
> -}
> -
> -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
> - Uses = [XSP] in {
> - def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [],
> - (Bimm bimm_target:$Label)>;
> -
> - def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [],
> - (BRx GPR64:$Rd)>;
> -}
> -
> -
> -def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
> - (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Thread local storage
> -//===----------------------------------------------------------------------===//
> -
> -// This is a pseudo-instruction representing the ".tlsdesccall" directive in
> -// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the
> -// current location. It should always be immediately followed by a BLR
> -// instruction, and is intended solely for relaxation by the linker.
> -
> -def : Pat<(A64threadpointer), (MRSxi 0xde82)>;
> -
> -def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> {
> - let hasSideEffects = 1;
> -}
> -
> -def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var),
> - [(A64tlsdesc_blr i64:$Rn, tglobaltlsaddr:$Var)]> {
> - let isCall = 1;
> - let Defs = [X30];
> -}
> -
> -def : Pat<(A64tlsdesc_blr i64:$Rn, texternalsym:$Var),
> - (TLSDESC_BLRx $Rn, texternalsym:$Var)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Bitfield patterns
> -//===----------------------------------------------------------------------===//
> -
> -def bfi32_lsb_to_immr : SDNodeXForm<imm, [{
> - return CurDAG->getTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64);
> -}]>;
> -
> -def bfi64_lsb_to_immr : SDNodeXForm<imm, [{
> - return CurDAG->getTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64);
> -}]>;
> -
> -def bfi_width_to_imms : SDNodeXForm<imm, [{
> - return CurDAG->getTargetConstant(N->getZExtValue() - 1, MVT::i64);
> -}]>;
> -
> -
> -// The simpler patterns deal with cases where no AND mask is actually needed
> -// (either all bits are used or the low 32 bits are used).
> -let AddedComplexity = 10 in {
> -
> -def : Pat<(A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
> - (BFIxxii $src, $Rn,
> - (bfi64_lsb_to_immr (i64 imm:$ImmR)),
> - (bfi_width_to_imms (i64 imm:$ImmS)))>;
> -
> -def : Pat<(A64Bfi i32:$src, i32:$Rn, imm:$ImmR, imm:$ImmS),
> - (BFIwwii $src, $Rn,
> - (bfi32_lsb_to_immr (i64 imm:$ImmR)),
> - (bfi_width_to_imms (i64 imm:$ImmS)))>;
> -
> -
> -def : Pat<(and (A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
> - (i64 4294967295)),
> - (SUBREG_TO_REG (i64 0),
> - (BFIwwii (EXTRACT_SUBREG $src, sub_32),
> - (EXTRACT_SUBREG $Rn, sub_32),
> - (bfi32_lsb_to_immr (i64 imm:$ImmR)),
> - (bfi_width_to_imms (i64 imm:$ImmS))),
> - sub_32)>;
> -
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Miscellaneous patterns
> -//===----------------------------------------------------------------------===//
> -
> -// Truncation from 64 to 32-bits just involves renaming your register.
> -def : Pat<(i32 (trunc i64:$val)), (EXTRACT_SUBREG $val, sub_32)>;
> -
> -// Similarly, extension where we don't care about the high bits is
> -// just a rename.
> -def : Pat<(i64 (anyext i32:$val)),
> - (INSERT_SUBREG (IMPLICIT_DEF), $val, sub_32)>;
> -
> -// SELECT instructions providing f128 types need to be handled by a
> -// pseudo-instruction since the eventual code will need to introduce basic
> -// blocks and control flow.
> -def F128CSEL : PseudoInst<(outs FPR128:$Rd),
> - (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond),
> - [(set f128:$Rd, (simple_select f128:$Rn, f128:$Rm))]> {
> - let Uses = [NZCV];
> - let usesCustomInserter = 1;
> -}
> -
> -//===----------------------------------------------------------------------===//
> -// Load/store patterns
> -//===----------------------------------------------------------------------===//
> -
> -// There are lots of patterns here, because we need to allow at least three
> -// parameters to vary independently.
> -// 1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ...
> -// 2. LLVM source: zextloadi8, anyextloadi8, ...
> -// 3. Address-generation: A64Wrapper, (add BASE, OFFSET), ...
> -//
> -// The biggest problem turns out to be the address-generation variable. At the
> -// point of instantiation we need to produce two DAGs, one for the pattern and
> -// one for the instruction. Doing this at the lowest level of classes doesn't
> -// work.
> -//
> -// Consider the simple uimm12 addressing mode, and the desire to match both (add
> -// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the
> -// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or
> -// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this
> -// operation, and PatFrags are for selection not output.
> -//
> -// As a result, the address-generation patterns are the final
> -// instantiations. However, we do still need to vary the operand for the address
> -// further down (At the point we're deciding A64WrapperSmall, we don't know
> -// the memory width of the operation).
> -
> -//===------------------------------
> -// 1. Basic infrastructural defs
> -//===------------------------------
> -
> -// First, some simple classes for !foreach and !subst to use:
> -class Decls {
> - dag pattern;
> -}
> -
> -def decls : Decls;
> -def ALIGN;
> -def INST;
> -def OFFSET;
> -def SHIFT;
> -
> -// You can't use !subst on an actual immediate, but you *can* use it on an
> -// operand record that happens to match a single immediate. So we do.
> -def imm_eq0 : ImmLeaf<i64, [{ return Imm == 0; }]>;
> -def imm_eq1 : ImmLeaf<i64, [{ return Imm == 1; }]>;
> -def imm_eq2 : ImmLeaf<i64, [{ return Imm == 2; }]>;
> -def imm_eq3 : ImmLeaf<i64, [{ return Imm == 3; }]>;
> -def imm_eq4 : ImmLeaf<i64, [{ return Imm == 4; }]>;
> -
> -// If the low bits of a pointer are known to be 0 then an "or" is just as good
> -// as addition for computing an offset. This fragment forwards that check for
> -// TableGen's use.
> -def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),
> -[{
> - return CurDAG->isBaseWithConstantOffset(SDValue(N, 0));
> -}]>;
> -
> -// Load/store (unsigned immediate) operations with relocations against global
> -// symbols (for lo12) are only valid if those symbols have correct alignment
> -// (since the immediate offset is divided by the access scale, it can't have a
> -// remainder).
> -//
> -// The guaranteed alignment is provided as part of the WrapperSmall
> -// operation, and checked against one of these.
> -def any_align : ImmLeaf<i32, [{ (void)Imm; return true; }]>;
> -def min_align2 : ImmLeaf<i32, [{ return Imm >= 2; }]>;
> -def min_align4 : ImmLeaf<i32, [{ return Imm >= 4; }]>;
> -def min_align8 : ImmLeaf<i32, [{ return Imm >= 8; }]>;
> -def min_align16 : ImmLeaf<i32, [{ return Imm >= 16; }]>;
> -
> -// "Normal" load/store instructions can be used on atomic operations, provided
> -// the ordering parameter is at most "monotonic". Anything above that needs
> -// special handling with acquire/release instructions.
> -class simple_load<PatFrag base>
> - : PatFrag<(ops node:$ptr), (base node:$ptr), [{
> - return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
> -}]>;
> -
> -def atomic_load_simple_i8 : simple_load<atomic_load_8>;
> -def atomic_load_simple_i16 : simple_load<atomic_load_16>;
> -def atomic_load_simple_i32 : simple_load<atomic_load_32>;
> -def atomic_load_simple_i64 : simple_load<atomic_load_64>;
> -
> -class simple_store<PatFrag base>
> - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
> - return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
> -}]>;
> -
> -def atomic_store_simple_i8 : simple_store<atomic_store_8>;
> -def atomic_store_simple_i16 : simple_store<atomic_store_16>;
> -def atomic_store_simple_i32 : simple_store<atomic_store_32>;
> -def atomic_store_simple_i64 : simple_store<atomic_store_64>;
> -
> -//===------------------------------
> -// 2. UImm12 and SImm9
> -//===------------------------------
> -
> -// These instructions have two operands providing the address so they can be
> -// treated similarly for most purposes.
> -
> -//===------------------------------
> -// 2.1 Base patterns covering extend/truncate semantics
> -//===------------------------------
> -
> -// Atomic patterns can be shared between integer operations of all sizes, a
> -// quick multiclass here allows reuse.
> -multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
> - dag Offset, dag address, ValueType transty,
> - ValueType sty> {
> - def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
> - (LOAD Base, Offset)>;
> -
> - def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
> - (STORE $Rt, Base, Offset)>;
> -}
> -
> -// Instructions accessing a memory chunk smaller than a register (or, in a
> -// pinch, the same size) have a characteristic set of patterns they want to
> -// match: extending loads and truncating stores. This class deals with the
> -// sign-neutral version of those patterns.
> -//
> -// It will be instantiated across multiple addressing-modes.
> -multiclass ls_small_pats<Instruction LOAD, Instruction STORE,
> - dag Base, dag Offset,
> - dag address, ValueType sty>
> - : ls_atomic_pats<LOAD, STORE, Base, Offset, address, i32, sty> {
> - def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>;
> -
> - def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>;
> -
> - // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
> - // register was actually set.
> - def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
> - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
> -
> - def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
> - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
> -
> - def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
> - (STORE $Rt, Base, Offset)>;
> -
> - // For truncating store from 64-bits, we have to manually tell LLVM to
> - // ignore the high bits of the x register.
> - def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
> - (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
> -}
> -
> -// Next come patterns for sign-extending loads.
> -multiclass load_signed_pats<string T, string U, dag Base, dag Offset,
> - dag address, ValueType sty> {
> - def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
> - (!cast<Instruction>("LDRS" # T # "w" # U) Base, Offset)>;
> -
> - def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
> - (!cast<Instruction>("LDRS" # T # "x" # U) Base, Offset)>;
> -
> -}
> -
> -// and finally "natural-width" loads and stores come next.
> -multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
> - dag Offset, dag address, ValueType sty> {
> - def : Pat<(sty (load address)), (LOAD Base, Offset)>;
> - def : Pat<(store sty:$Rt, address), (STORE $Rt, Base, Offset)>;
> -}
> -
> -// Integer operations also get atomic instructions to select for.
> -multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
> - dag Offset, dag address, ValueType sty>
> - : ls_neutral_pats<LOAD, STORE, Base, Offset, address, sty>,
> - ls_atomic_pats<LOAD, STORE, Base, Offset, address, sty, sty>;
> -
> -//===------------------------------
> -// 2.2. Addressing-mode instantiations
> -//===------------------------------
> -
> -multiclass uimm12_pats<dag address, dag Base, dag Offset> {
> - defm : ls_small_pats<LS8_LDR, LS8_STR, Base,
> - !foreach(decls.pattern, Offset,
> - !subst(OFFSET, byte_uimm12, decls.pattern)),
> - !foreach(decls.pattern, address,
> - !subst(OFFSET, byte_uimm12,
> - !subst(ALIGN, any_align, decls.pattern))),
> - i8>;
> - defm : ls_small_pats<LS16_LDR, LS16_STR, Base,
> - !foreach(decls.pattern, Offset,
> - !subst(OFFSET, hword_uimm12, decls.pattern)),
> - !foreach(decls.pattern, address,
> - !subst(OFFSET, hword_uimm12,
> - !subst(ALIGN, min_align2, decls.pattern))),
> - i16>;
> - defm : ls_small_pats<LS32_LDR, LS32_STR, Base,
> - !foreach(decls.pattern, Offset,
> - !subst(OFFSET, word_uimm12, decls.pattern)),
> - !foreach(decls.pattern, address,
> - !subst(OFFSET, word_uimm12,
> - !subst(ALIGN, min_align4, decls.pattern))),
> - i32>;
> -
> - defm : ls_int_neutral_pats<LS32_LDR, LS32_STR, Base,
> - !foreach(decls.pattern, Offset,
> - !subst(OFFSET, word_uimm12, decls.pattern)),
> - !foreach(decls.pattern, address,
> - !subst(OFFSET, word_uimm12,
> - !subst(ALIGN, min_align4, decls.pattern))),
> - i32>;
> -
> - defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base,
> - !foreach(decls.pattern, Offset,
> - !subst(OFFSET, dword_uimm12, decls.pattern)),
> - !foreach(decls.pattern, address,
> - !subst(OFFSET, dword_uimm12,
> - !subst(ALIGN, min_align8, decls.pattern))),
> - i64>;
> -
> - defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base,
> - !foreach(decls.pattern, Offset,
> - !subst(OFFSET, hword_uimm12, decls.pattern)),
> - !foreach(decls.pattern, address,
> - !subst(OFFSET, hword_uimm12,
> - !subst(ALIGN, min_align2, decls.pattern))),
> - f16>;
> -
> - defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base,
> - !foreach(decls.pattern, Offset,
> - !subst(OFFSET, word_uimm12, decls.pattern)),
> - !foreach(decls.pattern, address,
> - !subst(OFFSET, word_uimm12,
> - !subst(ALIGN, min_align4, decls.pattern))),
> - f32>;
> -
> - defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
> - !foreach(decls.pattern, Offset,
> - !subst(OFFSET, dword_uimm12, decls.pattern)),
> - !foreach(decls.pattern, address,
> - !subst(OFFSET, dword_uimm12,
> - !subst(ALIGN, min_align8, decls.pattern))),
> - f64>;
> -
> - defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
> - !foreach(decls.pattern, Offset,
> - !subst(OFFSET, qword_uimm12, decls.pattern)),
> - !foreach(decls.pattern, address,
> - !subst(OFFSET, qword_uimm12,
> - !subst(ALIGN, min_align16, decls.pattern))),
> - f128>;
> -
> - defm : load_signed_pats<"B", "", Base,
> - !foreach(decls.pattern, Offset,
> - !subst(OFFSET, byte_uimm12, decls.pattern)),
> - !foreach(decls.pattern, address,
> - !subst(OFFSET, byte_uimm12,
> - !subst(ALIGN, any_align, decls.pattern))),
> - i8>;
> -
> - defm : load_signed_pats<"H", "", Base,
> - !foreach(decls.pattern, Offset,
> - !subst(OFFSET, hword_uimm12, decls.pattern)),
> - !foreach(decls.pattern, address,
> - !subst(OFFSET, hword_uimm12,
> - !subst(ALIGN, min_align2, decls.pattern))),
> - i16>;
> -
> - def : Pat<(sextloadi32 !foreach(decls.pattern, address,
> - !subst(OFFSET, word_uimm12,
> - !subst(ALIGN, min_align4, decls.pattern)))),
> - (LDRSWx Base, !foreach(decls.pattern, Offset,
> - !subst(OFFSET, word_uimm12, decls.pattern)))>;
> -}
> -
> -// Straightforward patterns of last resort: a pointer with or without an
> -// appropriate offset.
> -defm : uimm12_pats<(i64 i64:$Rn), (i64 i64:$Rn), (i64 0)>;
> -defm : uimm12_pats<(add i64:$Rn, OFFSET:$UImm12),
> - (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
> -
> -// The offset could be hidden behind an "or", of course:
> -defm : uimm12_pats<(add_like_or i64:$Rn, OFFSET:$UImm12),
> - (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
> -
> -// Global addresses under the small-absolute model should use these
> -// instructions. There are ELF relocations specifically for it.
> -defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN),
> - (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>;
> -
> -defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12,
> - ALIGN),
> - (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>;
> -
> -// External symbols that make it this far should also get standard relocations.
> -defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12,
> - ALIGN),
> - (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>;
> -
> -defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
> - (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
> -
> -// We also want to use uimm12 instructions for local variables at the moment.
> -def tframeindex_XFORM : SDNodeXForm<frameindex, [{
> - int FI = cast<FrameIndexSDNode>(N)->getIndex();
> - return CurDAG->getTargetFrameIndex(FI, MVT::i64);
> -}]>;
> -
> -defm : uimm12_pats<(i64 frameindex:$Rn),
> - (tframeindex_XFORM tframeindex:$Rn), (i64 0)>;
> -
> -// These can be much simpler than uimm12 because we don't to change the operand
> -// type (e.g. LDURB and LDURH take the same operands).
> -multiclass simm9_pats<dag address, dag Base, dag Offset> {
> - defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>;
> - defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>;
> -
> - defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address, i32>;
> - defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address, i64>;
> -
> - defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address, f16>;
> - defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address, f32>;
> - defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address, f64>;
> - defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address,
> - f128>;
> -
> - def : Pat<(i64 (zextloadi32 address)),
> - (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>;
> -
> - def : Pat<(truncstorei32 i64:$Rt, address),
> - (LS32_STUR (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
> -
> - defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>;
> - defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>;
> - def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>;
> -}
> -
> -defm : simm9_pats<(add i64:$Rn, simm9:$SImm9),
> - (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
> -
> -defm : simm9_pats<(add_like_or i64:$Rn, simm9:$SImm9),
> - (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
> -
> -
> -//===------------------------------
> -// 3. Register offset patterns
> -//===------------------------------
> -
> -// Atomic patterns can be shared between integer operations of all sizes, a
> -// quick multiclass here allows reuse.
> -multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
> - dag Offset, dag Extend, dag address,
> - ValueType transty, ValueType sty> {
> - def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
> - (LOAD Base, Offset, Extend)>;
> -
> - def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
> - (STORE $Rt, Base, Offset, Extend)>;
> -}
> -
> -// The register offset instructions take three operands giving the instruction,
> -// and have an annoying split between instructions where Rm is 32-bit and
> -// 64-bit. So we need a special hierarchy to describe them. Other than that the
> -// same operations should be supported as for simm9 and uimm12 addressing.
> -
> -multiclass ro_small_pats<Instruction LOAD, Instruction STORE,
> - dag Base, dag Offset, dag Extend,
> - dag address, ValueType sty>
> - : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, i32, sty> {
> - def : Pat<(!cast<SDNode>(zextload # sty) address),
> - (LOAD Base, Offset, Extend)>;
> -
> - def : Pat<(!cast<SDNode>(extload # sty) address),
> - (LOAD Base, Offset, Extend)>;
> -
> - // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
> - // register was actually set.
> - def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
> - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
> -
> - def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
> - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
> -
> - def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
> - (STORE $Rt, Base, Offset, Extend)>;
> -
> - // For truncating store from 64-bits, we have to manually tell LLVM to
> - // ignore the high bits of the x register.
> - def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
> - (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset, Extend)>;
> -
> -}
> -
> -// Next come patterns for sign-extending loads.
> -multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend,
> - dag address, ValueType sty> {
> - def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
> - (!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset")
> - Base, Offset, Extend)>;
> -
> - def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
> - (!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset")
> - Base, Offset, Extend)>;
> -}
> -
> -// and finally "natural-width" loads and stores come next.
> -multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE,
> - dag Base, dag Offset, dag Extend, dag address,
> - ValueType sty> {
> - def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>;
> - def : Pat<(store sty:$Rt, address),
> - (STORE $Rt, Base, Offset, Extend)>;
> -}
> -
> -multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE,
> - dag Base, dag Offset, dag Extend, dag address,
> - ValueType sty>
> - : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, sty>,
> - ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, sty, sty>;
> -
> -multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset,
> - dag Extend> {
> - defm : ro_small_pats<!cast<Instruction>("LS8_" # Rm # "_RegOffset_LDR"),
> - !cast<Instruction>("LS8_" # Rm # "_RegOffset_STR"),
> - Base, Offset, Extend,
> - !foreach(decls.pattern, address,
> - !subst(SHIFT, imm_eq0, decls.pattern)),
> - i8>;
> - defm : ro_small_pats<!cast<Instruction>("LS16_" # Rm # "_RegOffset_LDR"),
> - !cast<Instruction>("LS16_" # Rm # "_RegOffset_STR"),
> - Base, Offset, Extend,
> - !foreach(decls.pattern, address,
> - !subst(SHIFT, imm_eq1, decls.pattern)),
> - i16>;
> - defm : ro_small_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
> - !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
> - Base, Offset, Extend,
> - !foreach(decls.pattern, address,
> - !subst(SHIFT, imm_eq2, decls.pattern)),
> - i32>;
> -
> - defm : ro_int_neutral_pats<
> - !cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
> - !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
> - Base, Offset, Extend,
> - !foreach(decls.pattern, address,
> - !subst(SHIFT, imm_eq2, decls.pattern)),
> - i32>;
> -
> - defm : ro_int_neutral_pats<
> - !cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"),
> - !cast<Instruction>("LS64_" # Rm # "_RegOffset_STR"),
> - Base, Offset, Extend,
> - !foreach(decls.pattern, address,
> - !subst(SHIFT, imm_eq3, decls.pattern)),
> - i64>;
> -
> - defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"),
> - !cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"),
> - Base, Offset, Extend,
> - !foreach(decls.pattern, address,
> - !subst(SHIFT, imm_eq1, decls.pattern)),
> - f16>;
> -
> - defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"),
> - !cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"),
> - Base, Offset, Extend,
> - !foreach(decls.pattern, address,
> - !subst(SHIFT, imm_eq2, decls.pattern)),
> - f32>;
> -
> - defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"),
> - !cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"),
> - Base, Offset, Extend,
> - !foreach(decls.pattern, address,
> - !subst(SHIFT, imm_eq3, decls.pattern)),
> - f64>;
> -
> - defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"),
> - !cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"),
> - Base, Offset, Extend,
> - !foreach(decls.pattern, address,
> - !subst(SHIFT, imm_eq4, decls.pattern)),
> - f128>;
> -
> - defm : ro_signed_pats<"B", Rm, Base, Offset, Extend,
> - !foreach(decls.pattern, address,
> - !subst(SHIFT, imm_eq0, decls.pattern)),
> - i8>;
> -
> - defm : ro_signed_pats<"H", Rm, Base, Offset, Extend,
> - !foreach(decls.pattern, address,
> - !subst(SHIFT, imm_eq1, decls.pattern)),
> - i16>;
> -
> - def : Pat<(sextloadi32 !foreach(decls.pattern, address,
> - !subst(SHIFT, imm_eq2, decls.pattern))),
> - (!cast<Instruction>("LDRSWx_" # Rm # "_RegOffset")
> - Base, Offset, Extend)>;
> -}
> -
> -
> -// Finally we're in a position to tell LLVM exactly what addresses are reachable
> -// using register-offset instructions. Essentially a base plus a possibly
> -// extended, possibly shifted (by access size) offset.
> -
> -defm : regoff_pats<"Wm", (add i64:$Rn, (sext i32:$Rm)),
> - (i64 i64:$Rn), (i32 i32:$Rm), (i64 6)>;
> -
> -defm : regoff_pats<"Wm", (add i64:$Rn, (shl (sext i32:$Rm), SHIFT)),
> - (i64 i64:$Rn), (i32 i32:$Rm), (i64 7)>;
> -
> -defm : regoff_pats<"Wm", (add i64:$Rn, (zext i32:$Rm)),
> - (i64 i64:$Rn), (i32 i32:$Rm), (i64 2)>;
> -
> -defm : regoff_pats<"Wm", (add i64:$Rn, (shl (zext i32:$Rm), SHIFT)),
> - (i64 i64:$Rn), (i32 i32:$Rm), (i64 3)>;
> -
> -defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm),
> - (i64 i64:$Rn), (i64 i64:$Rm), (i64 2)>;
> -
> -defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
> - (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>;
> -
> -//===----------------------------------------------------------------------===//
> -// Advanced SIMD (NEON) Support
> -//
> -
> -include "AArch64InstrNEON.td"
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list